s3-includes: only include system/filesys.h when needed.
[kai/samba-autobuild/.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "librpc/gen_ndr/messaging.h"
30 #include "smbd/globals.h"
31 #include "dbwrap.h"
32 #include "serverid.h"
33
34 #undef DBGC_CLASS
35 #define DBGC_CLASS DBGC_LOCKING
36
37 #define ZERO_ZERO 0
38
39 /* The open brlock.tdb database. */
40
41 static struct db_context *brlock_db;
42
43 /****************************************************************************
44  Debug info at level 10 for lock struct.
45 ****************************************************************************/
46
47 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
48 {
49         DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
50                         i,
51                         (unsigned long long)pls->context.smblctx,
52                         (unsigned int)pls->context.tid,
53                         procid_str(talloc_tos(), &pls->context.pid) ));
54         
55         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
56                 (double)pls->start,
57                 (double)pls->size,
58                 pls->fnum,
59                 lock_type_name(pls->lock_type),
60                 lock_flav_name(pls->lock_flav) ));
61 }
62
63 /****************************************************************************
64  See if two locking contexts are equal.
65 ****************************************************************************/
66
67 bool brl_same_context(const struct lock_context *ctx1, 
68                              const struct lock_context *ctx2)
69 {
70         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
71                 (ctx1->smblctx == ctx2->smblctx) &&
72                 (ctx1->tid == ctx2->tid));
73 }
74
75 /****************************************************************************
76  See if lck1 and lck2 overlap.
77 ****************************************************************************/
78
79 static bool brl_overlap(const struct lock_struct *lck1,
80                         const struct lock_struct *lck2)
81 {
82         /* XXX Remove for Win7 compatibility. */
83         /* this extra check is not redundent - it copes with locks
84            that go beyond the end of 64 bit file space */
85         if (lck1->size != 0 &&
86             lck1->start == lck2->start &&
87             lck1->size == lck2->size) {
88                 return True;
89         }
90
91         if (lck1->start >= (lck2->start+lck2->size) ||
92             lck2->start >= (lck1->start+lck1->size)) {
93                 return False;
94         }
95         return True;
96 }
97
98 /****************************************************************************
99  See if lock2 can be added when lock1 is in place.
100 ****************************************************************************/
101
102 static bool brl_conflict(const struct lock_struct *lck1, 
103                          const struct lock_struct *lck2)
104 {
105         /* Ignore PENDING locks. */
106         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
107                 return False;
108
109         /* Read locks never conflict. */
110         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
111                 return False;
112         }
113
114         /* A READ lock can stack on top of a WRITE lock if they have the same
115          * context & fnum. */
116         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
117             brl_same_context(&lck1->context, &lck2->context) &&
118             lck1->fnum == lck2->fnum) {
119                 return False;
120         }
121
122         return brl_overlap(lck1, lck2);
123
124
125 /****************************************************************************
126  See if lock2 can be added when lock1 is in place - when both locks are POSIX
127  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
128  know already match.
129 ****************************************************************************/
130
131 static bool brl_conflict_posix(const struct lock_struct *lck1, 
132                                 const struct lock_struct *lck2)
133 {
134 #if defined(DEVELOPER)
135         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
136         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
137 #endif
138
139         /* Ignore PENDING locks. */
140         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
141                 return False;
142
143         /* Read locks never conflict. */
144         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
145                 return False;
146         }
147
148         /* Locks on the same context con't conflict. Ignore fnum. */
149         if (brl_same_context(&lck1->context, &lck2->context)) {
150                 return False;
151         }
152
153         /* One is read, the other write, or the context is different,
154            do they overlap ? */
155         return brl_overlap(lck1, lck2);
156
157
158 #if ZERO_ZERO
159 static bool brl_conflict1(const struct lock_struct *lck1, 
160                          const struct lock_struct *lck2)
161 {
162         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
163                 return False;
164
165         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
166                 return False;
167         }
168
169         if (brl_same_context(&lck1->context, &lck2->context) &&
170             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
171                 return False;
172         }
173
174         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
175                 return True;
176         }
177
178         if (lck1->start >= (lck2->start + lck2->size) ||
179             lck2->start >= (lck1->start + lck1->size)) {
180                 return False;
181         }
182             
183         return True;
184
185 #endif
186
187 /****************************************************************************
188  Check to see if this lock conflicts, but ignore our own locks on the
189  same fnum only. This is the read/write lock check code path.
190  This is never used in the POSIX lock case.
191 ****************************************************************************/
192
193 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
194 {
195         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
196                 return False;
197
198         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
199                 return False;
200
201         /* POSIX flavour locks never conflict here - this is only called
202            in the read/write path. */
203
204         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
205                 return False;
206
207         /*
208          * Incoming WRITE locks conflict with existing READ locks even
209          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
210          */
211
212         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
213                 if (brl_same_context(&lck1->context, &lck2->context) &&
214                                         lck1->fnum == lck2->fnum)
215                         return False;
216         }
217
218         return brl_overlap(lck1, lck2);
219
220
221 /****************************************************************************
222  Check if an unlock overlaps a pending lock.
223 ****************************************************************************/
224
225 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
226 {
227         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
228                 return True;
229         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
230                 return True;
231         return False;
232 }
233
234 /****************************************************************************
235  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
236  is the same as this one and changes its error code. I wonder if any
237  app depends on this ?
238 ****************************************************************************/
239
240 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
241 {
242         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
243                 /* amazing the little things you learn with a test
244                    suite. Locks beyond this offset (as a 64 bit
245                    number!) always generate the conflict error code,
246                    unless the top bit is set */
247                 if (!blocking_lock) {
248                         fsp->last_lock_failure = *lock;
249                 }
250                 return NT_STATUS_FILE_LOCK_CONFLICT;
251         }
252
253         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
254                         lock->context.tid == fsp->last_lock_failure.context.tid &&
255                         lock->fnum == fsp->last_lock_failure.fnum &&
256                         lock->start == fsp->last_lock_failure.start) {
257                 return NT_STATUS_FILE_LOCK_CONFLICT;
258         }
259
260         if (!blocking_lock) {
261                 fsp->last_lock_failure = *lock;
262         }
263         return NT_STATUS_LOCK_NOT_GRANTED;
264 }
265
266 /****************************************************************************
267  Open up the brlock.tdb database.
268 ****************************************************************************/
269
270 void brl_init(bool read_only)
271 {
272         int tdb_flags;
273
274         if (brlock_db) {
275                 return;
276         }
277
278         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
279
280         if (!lp_clustering()) {
281                 /*
282                  * We can't use the SEQNUM trick to cache brlock
283                  * entries in the clustering case because ctdb seqnum
284                  * propagation has a delay.
285                  */
286                 tdb_flags |= TDB_SEQNUM;
287         }
288
289         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
290                             lp_open_files_db_hash_size(), tdb_flags,
291                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
292         if (!brlock_db) {
293                 DEBUG(0,("Failed to open byte range locking database %s\n",
294                         lock_path("brlock.tdb")));
295                 return;
296         }
297 }
298
299 /****************************************************************************
300  Close down the brlock.tdb database.
301 ****************************************************************************/
302
303 void brl_shutdown(void)
304 {
305         TALLOC_FREE(brlock_db);
306 }
307
308 #if ZERO_ZERO
309 /****************************************************************************
310  Compare two locks for sorting.
311 ****************************************************************************/
312
313 static int lock_compare(const struct lock_struct *lck1, 
314                          const struct lock_struct *lck2)
315 {
316         if (lck1->start != lck2->start) {
317                 return (lck1->start - lck2->start);
318         }
319         if (lck2->size != lck1->size) {
320                 return ((int)lck1->size - (int)lck2->size);
321         }
322         return 0;
323 }
324 #endif
325
326 /****************************************************************************
327  Lock a range of bytes - Windows lock semantics.
328 ****************************************************************************/
329
330 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
331     struct lock_struct *plock, bool blocking_lock)
332 {
333         unsigned int i;
334         files_struct *fsp = br_lck->fsp;
335         struct lock_struct *locks = br_lck->lock_data;
336         NTSTATUS status;
337
338         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
339
340         if ((plock->start + plock->size - 1 < plock->start) &&
341                         plock->size != 0) {
342                 return NT_STATUS_INVALID_LOCK_RANGE;
343         }
344
345         for (i=0; i < br_lck->num_locks; i++) {
346                 /* Do any Windows or POSIX locks conflict ? */
347                 if (brl_conflict(&locks[i], plock)) {
348                         /* Remember who blocked us. */
349                         plock->context.smblctx = locks[i].context.smblctx;
350                         return brl_lock_failed(fsp,plock,blocking_lock);
351                 }
352 #if ZERO_ZERO
353                 if (plock->start == 0 && plock->size == 0 && 
354                                 locks[i].size == 0) {
355                         break;
356                 }
357 #endif
358         }
359
360         if (!IS_PENDING_LOCK(plock->lock_type)) {
361                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
362         }
363
364         /* We can get the Windows lock, now see if it needs to
365            be mapped into a lower level POSIX one, and if so can
366            we get it ? */
367
368         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
369                 int errno_ret;
370                 if (!set_posix_lock_windows_flavour(fsp,
371                                 plock->start,
372                                 plock->size,
373                                 plock->lock_type,
374                                 &plock->context,
375                                 locks,
376                                 br_lck->num_locks,
377                                 &errno_ret)) {
378
379                         /* We don't know who blocked us. */
380                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
381
382                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
383                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
384                                 goto fail;
385                         } else {
386                                 status = map_nt_error_from_unix(errno);
387                                 goto fail;
388                         }
389                 }
390         }
391
392         /* no conflicts - add it to the list of locks */
393         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
394         if (!locks) {
395                 status = NT_STATUS_NO_MEMORY;
396                 goto fail;
397         }
398
399         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
400         br_lck->num_locks += 1;
401         br_lck->lock_data = locks;
402         br_lck->modified = True;
403
404         return NT_STATUS_OK;
405  fail:
406         if (!IS_PENDING_LOCK(plock->lock_type)) {
407                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
408         }
409         return status;
410 }
411
412 /****************************************************************************
413  Cope with POSIX range splits and merges.
414 ****************************************************************************/
415
416 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
417                                                 struct lock_struct *ex,         /* existing lock. */
418                                                 struct lock_struct *plock)      /* proposed lock. */
419 {
420         bool lock_types_differ = (ex->lock_type != plock->lock_type);
421
422         /* We can't merge non-conflicting locks on different context - ignore fnum. */
423
424         if (!brl_same_context(&ex->context, &plock->context)) {
425                 /* Just copy. */
426                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
427                 return 1;
428         }
429
430         /* We now know we have the same context. */
431
432         /* Did we overlap ? */
433
434 /*********************************************
435                                         +---------+
436                                         | ex      |
437                                         +---------+
438                          +-------+
439                          | plock |
440                          +-------+
441 OR....
442         +---------+
443         |  ex     |
444         +---------+
445 **********************************************/
446
447         if ( (ex->start > (plock->start + plock->size)) ||
448                 (plock->start > (ex->start + ex->size))) {
449
450                 /* No overlap with this lock - copy existing. */
451
452                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
453                 return 1;
454         }
455
456 /*********************************************
457         +---------------------------+
458         |          ex               |
459         +---------------------------+
460         +---------------------------+
461         |       plock               | -> replace with plock.
462         +---------------------------+
463 OR
464              +---------------+
465              |       ex      |
466              +---------------+
467         +---------------------------+
468         |       plock               | -> replace with plock.
469         +---------------------------+
470
471 **********************************************/
472
473         if ( (ex->start >= plock->start) &&
474                 (ex->start + ex->size <= plock->start + plock->size) ) {
475
476                 /* Replace - discard existing lock. */
477
478                 return 0;
479         }
480
481 /*********************************************
482 Adjacent after.
483                         +-------+
484                         |  ex   |
485                         +-------+
486         +---------------+
487         |   plock       |
488         +---------------+
489
490 BECOMES....
491         +---------------+-------+
492         |   plock       | ex    | - different lock types.
493         +---------------+-------+
494 OR.... (merge)
495         +-----------------------+
496         |   plock               | - same lock type.
497         +-----------------------+
498 **********************************************/
499
500         if (plock->start + plock->size == ex->start) {
501
502                 /* If the lock types are the same, we merge, if different, we
503                    add the remainder of the old lock. */
504
505                 if (lock_types_differ) {
506                         /* Add existing. */
507                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
508                         return 1;
509                 } else {
510                         /* Merge - adjust incoming lock as we may have more
511                          * merging to come. */
512                         plock->size += ex->size;
513                         return 0;
514                 }
515         }
516
517 /*********************************************
518 Adjacent before.
519         +-------+
520         |  ex   |
521         +-------+
522                 +---------------+
523                 |   plock       |
524                 +---------------+
525 BECOMES....
526         +-------+---------------+
527         | ex    |   plock       | - different lock types
528         +-------+---------------+
529
530 OR.... (merge)
531         +-----------------------+
532         |      plock            | - same lock type.
533         +-----------------------+
534
535 **********************************************/
536
537         if (ex->start + ex->size == plock->start) {
538
539                 /* If the lock types are the same, we merge, if different, we
540                    add the existing lock. */
541
542                 if (lock_types_differ) {
543                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
544                         return 1;
545                 } else {
546                         /* Merge - adjust incoming lock as we may have more
547                          * merging to come. */
548                         plock->start = ex->start;
549                         plock->size += ex->size;
550                         return 0;
551                 }
552         }
553
554 /*********************************************
555 Overlap after.
556         +-----------------------+
557         |          ex           |
558         +-----------------------+
559         +---------------+
560         |   plock       |
561         +---------------+
562 OR
563                +----------------+
564                |       ex       |
565                +----------------+
566         +---------------+
567         |   plock       |
568         +---------------+
569
570 BECOMES....
571         +---------------+-------+
572         |   plock       | ex    | - different lock types.
573         +---------------+-------+
574 OR.... (merge)
575         +-----------------------+
576         |   plock               | - same lock type.
577         +-----------------------+
578 **********************************************/
579
580         if ( (ex->start >= plock->start) &&
581                 (ex->start <= plock->start + plock->size) &&
582                 (ex->start + ex->size > plock->start + plock->size) ) {
583
584                 /* If the lock types are the same, we merge, if different, we
585                    add the remainder of the old lock. */
586
587                 if (lock_types_differ) {
588                         /* Add remaining existing. */
589                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
590                         /* Adjust existing start and size. */
591                         lck_arr[0].start = plock->start + plock->size;
592                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
593                         return 1;
594                 } else {
595                         /* Merge - adjust incoming lock as we may have more
596                          * merging to come. */
597                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
598                         return 0;
599                 }
600         }
601
602 /*********************************************
603 Overlap before.
604         +-----------------------+
605         |  ex                   |
606         +-----------------------+
607                 +---------------+
608                 |   plock       |
609                 +---------------+
610 OR
611         +-------------+
612         |  ex         |
613         +-------------+
614                 +---------------+
615                 |   plock       |
616                 +---------------+
617
618 BECOMES....
619         +-------+---------------+
620         | ex    |   plock       | - different lock types
621         +-------+---------------+
622
623 OR.... (merge)
624         +-----------------------+
625         |      plock            | - same lock type.
626         +-----------------------+
627
628 **********************************************/
629
630         if ( (ex->start < plock->start) &&
631                         (ex->start + ex->size >= plock->start) &&
632                         (ex->start + ex->size <= plock->start + plock->size) ) {
633
634                 /* If the lock types are the same, we merge, if different, we
635                    add the truncated old lock. */
636
637                 if (lock_types_differ) {
638                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
639                         /* Adjust existing size. */
640                         lck_arr[0].size = plock->start - ex->start;
641                         return 1;
642                 } else {
643                         /* Merge - adjust incoming lock as we may have more
644                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
645                         plock->size += (plock->start - ex->start);
646                         plock->start = ex->start;
647                         return 0;
648                 }
649         }
650
651 /*********************************************
652 Complete overlap.
653         +---------------------------+
654         |        ex                 |
655         +---------------------------+
656                 +---------+
657                 |  plock  |
658                 +---------+
659 BECOMES.....
660         +-------+---------+---------+
661         | ex    |  plock  | ex      | - different lock types.
662         +-------+---------+---------+
663 OR
664         +---------------------------+
665         |        plock              | - same lock type.
666         +---------------------------+
667 **********************************************/
668
669         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
670
671                 if (lock_types_differ) {
672
673                         /* We have to split ex into two locks here. */
674
675                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
676                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
677
678                         /* Adjust first existing size. */
679                         lck_arr[0].size = plock->start - ex->start;
680
681                         /* Adjust second existing start and size. */
682                         lck_arr[1].start = plock->start + plock->size;
683                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
684                         return 2;
685                 } else {
686                         /* Just eat the existing locks, merge them into plock. */
687                         plock->start = ex->start;
688                         plock->size = ex->size;
689                         return 0;
690                 }
691         }
692
693         /* Never get here. */
694         smb_panic("brlock_posix_split_merge");
695         /* Notreached. */
696
697         /* Keep some compilers happy. */
698         return 0;
699 }
700
701 /****************************************************************************
702  Lock a range of bytes - POSIX lock semantics.
703  We must cope with range splits and merges.
704 ****************************************************************************/
705
706 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
707                                struct byte_range_lock *br_lck,
708                                struct lock_struct *plock)
709 {
710         unsigned int i, count, posix_count;
711         struct lock_struct *locks = br_lck->lock_data;
712         struct lock_struct *tp;
713         bool signal_pending_read = False;
714         bool break_oplocks = false;
715         NTSTATUS status;
716
717         /* No zero-zero locks for POSIX. */
718         if (plock->start == 0 && plock->size == 0) {
719                 return NT_STATUS_INVALID_PARAMETER;
720         }
721
722         /* Don't allow 64-bit lock wrap. */
723         if (plock->start + plock->size - 1 < plock->start) {
724                 return NT_STATUS_INVALID_PARAMETER;
725         }
726
727         /* The worst case scenario here is we have to split an
728            existing POSIX lock range into two, and add our lock,
729            so we need at most 2 more entries. */
730
731         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
732         if (!tp) {
733                 return NT_STATUS_NO_MEMORY;
734         }
735
736         count = posix_count = 0;
737
738         for (i=0; i < br_lck->num_locks; i++) {
739                 struct lock_struct *curr_lock = &locks[i];
740
741                 /* If we have a pending read lock, a lock downgrade should
742                    trigger a lock re-evaluation. */
743                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
744                                 brl_pending_overlap(plock, curr_lock)) {
745                         signal_pending_read = True;
746                 }
747
748                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
749                         /* Do any Windows flavour locks conflict ? */
750                         if (brl_conflict(curr_lock, plock)) {
751                                 /* No games with error messages. */
752                                 SAFE_FREE(tp);
753                                 /* Remember who blocked us. */
754                                 plock->context.smblctx = curr_lock->context.smblctx;
755                                 return NT_STATUS_FILE_LOCK_CONFLICT;
756                         }
757                         /* Just copy the Windows lock into the new array. */
758                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
759                         count++;
760                 } else {
761                         unsigned int tmp_count = 0;
762
763                         /* POSIX conflict semantics are different. */
764                         if (brl_conflict_posix(curr_lock, plock)) {
765                                 /* Can't block ourselves with POSIX locks. */
766                                 /* No games with error messages. */
767                                 SAFE_FREE(tp);
768                                 /* Remember who blocked us. */
769                                 plock->context.smblctx = curr_lock->context.smblctx;
770                                 return NT_STATUS_FILE_LOCK_CONFLICT;
771                         }
772
773                         /* Work out overlaps. */
774                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
775                         posix_count += tmp_count;
776                         count += tmp_count;
777                 }
778         }
779
780         /*
781          * Break oplocks while we hold a brl. Since lock() and unlock() calls
782          * are not symetric with POSIX semantics, we cannot guarantee our
783          * contend_level2_oplocks_begin/end calls will be acquired and
784          * released one-for-one as with Windows semantics. Therefore we only
785          * call contend_level2_oplocks_begin if this is the first POSIX brl on
786          * the file.
787          */
788         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
789                          posix_count == 0);
790         if (break_oplocks) {
791                 contend_level2_oplocks_begin(br_lck->fsp,
792                                              LEVEL2_CONTEND_POSIX_BRL);
793         }
794
795         /* Try and add the lock in order, sorted by lock start. */
796         for (i=0; i < count; i++) {
797                 struct lock_struct *curr_lock = &tp[i];
798
799                 if (curr_lock->start <= plock->start) {
800                         continue;
801                 }
802         }
803
804         if (i < count) {
805                 memmove(&tp[i+1], &tp[i],
806                         (count - i)*sizeof(struct lock_struct));
807         }
808         memcpy(&tp[i], plock, sizeof(struct lock_struct));
809         count++;
810
811         /* We can get the POSIX lock, now see if it needs to
812            be mapped into a lower level POSIX one, and if so can
813            we get it ? */
814
815         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
816                 int errno_ret;
817
818                 /* The lower layer just needs to attempt to
819                    get the system POSIX lock. We've weeded out
820                    any conflicts above. */
821
822                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
823                                 plock->start,
824                                 plock->size,
825                                 plock->lock_type,
826                                 &errno_ret)) {
827
828                         /* We don't know who blocked us. */
829                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
830
831                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
832                                 SAFE_FREE(tp);
833                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
834                                 goto fail;
835                         } else {
836                                 SAFE_FREE(tp);
837                                 status = map_nt_error_from_unix(errno);
838                                 goto fail;
839                         }
840                 }
841         }
842
843         /* If we didn't use all the allocated size,
844          * Realloc so we don't leak entries per lock call. */
845         if (count < br_lck->num_locks + 2) {
846                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
847                 if (!tp) {
848                         status = NT_STATUS_NO_MEMORY;
849                         goto fail;
850                 }
851         }
852
853         br_lck->num_locks = count;
854         SAFE_FREE(br_lck->lock_data);
855         br_lck->lock_data = tp;
856         locks = tp;
857         br_lck->modified = True;
858
859         /* A successful downgrade from write to read lock can trigger a lock
860            re-evalutation where waiting readers can now proceed. */
861
862         if (signal_pending_read) {
863                 /* Send unlock messages to any pending read waiters that overlap. */
864                 for (i=0; i < br_lck->num_locks; i++) {
865                         struct lock_struct *pend_lock = &locks[i];
866
867                         /* Ignore non-pending locks. */
868                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
869                                 continue;
870                         }
871
872                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
873                                         brl_pending_overlap(plock, pend_lock)) {
874                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
875                                         procid_str_static(&pend_lock->context.pid )));
876
877                                 messaging_send(msg_ctx, pend_lock->context.pid,
878                                                MSG_SMB_UNLOCK, &data_blob_null);
879                         }
880                 }
881         }
882
883         return NT_STATUS_OK;
884  fail:
885         if (break_oplocks) {
886                 contend_level2_oplocks_end(br_lck->fsp,
887                                            LEVEL2_CONTEND_POSIX_BRL);
888         }
889         return status;
890 }
891
892 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
893                                        struct byte_range_lock *br_lck,
894                                        struct lock_struct *plock,
895                                        bool blocking_lock,
896                                        struct blocking_lock_record *blr)
897 {
898         VFS_FIND(brl_lock_windows);
899         return handle->fns->brl_lock_windows(handle, br_lck, plock,
900                                              blocking_lock, blr);
901 }
902
903 /****************************************************************************
904  Lock a range of bytes.
905 ****************************************************************************/
906
907 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
908                 struct byte_range_lock *br_lck,
909                 uint64_t smblctx,
910                 struct server_id pid,
911                 br_off start,
912                 br_off size, 
913                 enum brl_type lock_type,
914                 enum brl_flavour lock_flav,
915                 bool blocking_lock,
916                 uint64_t *psmblctx,
917                 struct blocking_lock_record *blr)
918 {
919         NTSTATUS ret;
920         struct lock_struct lock;
921
922 #if !ZERO_ZERO
923         if (start == 0 && size == 0) {
924                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
925         }
926 #endif
927
928 #ifdef DEVELOPER
929         /* Quieten valgrind on test. */
930         memset(&lock, '\0', sizeof(lock));
931 #endif
932
933         lock.context.smblctx = smblctx;
934         lock.context.pid = pid;
935         lock.context.tid = br_lck->fsp->conn->cnum;
936         lock.start = start;
937         lock.size = size;
938         lock.fnum = br_lck->fsp->fnum;
939         lock.lock_type = lock_type;
940         lock.lock_flav = lock_flav;
941
942         if (lock_flav == WINDOWS_LOCK) {
943                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
944                     &lock, blocking_lock, blr);
945         } else {
946                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
947         }
948
949 #if ZERO_ZERO
950         /* sort the lock list */
951         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
952 #endif
953
954         /* If we're returning an error, return who blocked us. */
955         if (!NT_STATUS_IS_OK(ret) && psmblctx) {
956                 *psmblctx = lock.context.smblctx;
957         }
958         return ret;
959 }
960
961 /****************************************************************************
962  Unlock a range of bytes - Windows semantics.
963 ****************************************************************************/
964
965 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
966                                struct byte_range_lock *br_lck,
967                                const struct lock_struct *plock)
968 {
969         unsigned int i, j;
970         struct lock_struct *locks = br_lck->lock_data;
971         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
972
973         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
974
975 #if ZERO_ZERO
976         /* Delete write locks by preference... The lock list
977            is sorted in the zero zero case. */
978
979         for (i = 0; i < br_lck->num_locks; i++) {
980                 struct lock_struct *lock = &locks[i];
981
982                 if (lock->lock_type == WRITE_LOCK &&
983                     brl_same_context(&lock->context, &plock->context) &&
984                     lock->fnum == plock->fnum &&
985                     lock->lock_flav == WINDOWS_LOCK &&
986                     lock->start == plock->start &&
987                     lock->size == plock->size) {
988
989                         /* found it - delete it */
990                         deleted_lock_type = lock->lock_type;
991                         break;
992                 }
993         }
994
995         if (i != br_lck->num_locks) {
996                 /* We found it - don't search again. */
997                 goto unlock_continue;
998         }
999 #endif
1000
1001         for (i = 0; i < br_lck->num_locks; i++) {
1002                 struct lock_struct *lock = &locks[i];
1003
1004                 if (IS_PENDING_LOCK(lock->lock_type)) {
1005                         continue;
1006                 }
1007
1008                 /* Only remove our own locks that match in start, size, and flavour. */
1009                 if (brl_same_context(&lock->context, &plock->context) &&
1010                                         lock->fnum == plock->fnum &&
1011                                         lock->lock_flav == WINDOWS_LOCK &&
1012                                         lock->start == plock->start &&
1013                                         lock->size == plock->size ) {
1014                         deleted_lock_type = lock->lock_type;
1015                         break;
1016                 }
1017         }
1018
1019         if (i == br_lck->num_locks) {
1020                 /* we didn't find it */
1021                 return False;
1022         }
1023
1024 #if ZERO_ZERO
1025   unlock_continue:
1026 #endif
1027
1028         /* Actually delete the lock. */
1029         if (i < br_lck->num_locks - 1) {
1030                 memmove(&locks[i], &locks[i+1], 
1031                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1032         }
1033
1034         br_lck->num_locks -= 1;
1035         br_lck->modified = True;
1036
1037         /* Unlock the underlying POSIX regions. */
1038         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1039                 release_posix_lock_windows_flavour(br_lck->fsp,
1040                                 plock->start,
1041                                 plock->size,
1042                                 deleted_lock_type,
1043                                 &plock->context,
1044                                 locks,
1045                                 br_lck->num_locks);
1046         }
1047
1048         /* Send unlock messages to any pending waiters that overlap. */
1049         for (j=0; j < br_lck->num_locks; j++) {
1050                 struct lock_struct *pend_lock = &locks[j];
1051
1052                 /* Ignore non-pending locks. */
1053                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1054                         continue;
1055                 }
1056
1057                 /* We could send specific lock info here... */
1058                 if (brl_pending_overlap(plock, pend_lock)) {
1059                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1060                                 procid_str_static(&pend_lock->context.pid )));
1061
1062                         messaging_send(msg_ctx, pend_lock->context.pid,
1063                                        MSG_SMB_UNLOCK, &data_blob_null);
1064                 }
1065         }
1066
1067         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1068         return True;
1069 }
1070
1071 /****************************************************************************
1072  Unlock a range of bytes - POSIX semantics.
1073 ****************************************************************************/
1074
1075 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1076                              struct byte_range_lock *br_lck,
1077                              struct lock_struct *plock)
1078 {
1079         unsigned int i, j, count;
1080         struct lock_struct *tp;
1081         struct lock_struct *locks = br_lck->lock_data;
1082         bool overlap_found = False;
1083
1084         /* No zero-zero locks for POSIX. */
1085         if (plock->start == 0 && plock->size == 0) {
1086                 return False;
1087         }
1088
1089         /* Don't allow 64-bit lock wrap. */
1090         if (plock->start + plock->size < plock->start ||
1091                         plock->start + plock->size < plock->size) {
1092                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1093                 return False;
1094         }
1095
1096         /* The worst case scenario here is we have to split an
1097            existing POSIX lock range into two, so we need at most
1098            1 more entry. */
1099
1100         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1101         if (!tp) {
1102                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1103                 return False;
1104         }
1105
1106         count = 0;
1107         for (i = 0; i < br_lck->num_locks; i++) {
1108                 struct lock_struct *lock = &locks[i];
1109                 unsigned int tmp_count;
1110
1111                 /* Only remove our own locks - ignore fnum. */
1112                 if (IS_PENDING_LOCK(lock->lock_type) ||
1113                                 !brl_same_context(&lock->context, &plock->context)) {
1114                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1115                         count++;
1116                         continue;
1117                 }
1118
1119                 if (lock->lock_flav == WINDOWS_LOCK) {
1120                         /* Do any Windows flavour locks conflict ? */
1121                         if (brl_conflict(lock, plock)) {
1122                                 SAFE_FREE(tp);
1123                                 return false;
1124                         }
1125                         /* Just copy the Windows lock into the new array. */
1126                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1127                         count++;
1128                         continue;
1129                 }
1130
1131                 /* Work out overlaps. */
1132                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1133
1134                 if (tmp_count == 0) {
1135                         /* plock overlapped the existing lock completely,
1136                            or replaced it. Don't copy the existing lock. */
1137                         overlap_found = true;
1138                 } else if (tmp_count == 1) {
1139                         /* Either no overlap, (simple copy of existing lock) or
1140                          * an overlap of an existing lock. */
1141                         /* If the lock changed size, we had an overlap. */
1142                         if (tp[count].size != lock->size) {
1143                                 overlap_found = true;
1144                         }
1145                         count += tmp_count;
1146                 } else if (tmp_count == 2) {
1147                         /* We split a lock range in two. */
1148                         overlap_found = true;
1149                         count += tmp_count;
1150
1151                         /* Optimisation... */
1152                         /* We know we're finished here as we can't overlap any
1153                            more POSIX locks. Copy the rest of the lock array. */
1154
1155                         if (i < br_lck->num_locks - 1) {
1156                                 memcpy(&tp[count], &locks[i+1],
1157                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1158                                 count += ((br_lck->num_locks-1) - i);
1159                         }
1160                         break;
1161                 }
1162
1163         }
1164
1165         if (!overlap_found) {
1166                 /* Just ignore - no change. */
1167                 SAFE_FREE(tp);
1168                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1169                 return True;
1170         }
1171
1172         /* Unlock any POSIX regions. */
1173         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1174                 release_posix_lock_posix_flavour(br_lck->fsp,
1175                                                 plock->start,
1176                                                 plock->size,
1177                                                 &plock->context,
1178                                                 tp,
1179                                                 count);
1180         }
1181
1182         /* Realloc so we don't leak entries per unlock call. */
1183         if (count) {
1184                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1185                 if (!tp) {
1186                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1187                         return False;
1188                 }
1189         } else {
1190                 /* We deleted the last lock. */
1191                 SAFE_FREE(tp);
1192                 tp = NULL;
1193         }
1194
1195         contend_level2_oplocks_end(br_lck->fsp,
1196                                    LEVEL2_CONTEND_POSIX_BRL);
1197
1198         br_lck->num_locks = count;
1199         SAFE_FREE(br_lck->lock_data);
1200         locks = tp;
1201         br_lck->lock_data = tp;
1202         br_lck->modified = True;
1203
1204         /* Send unlock messages to any pending waiters that overlap. */
1205
1206         for (j=0; j < br_lck->num_locks; j++) {
1207                 struct lock_struct *pend_lock = &locks[j];
1208
1209                 /* Ignore non-pending locks. */
1210                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1211                         continue;
1212                 }
1213
1214                 /* We could send specific lock info here... */
1215                 if (brl_pending_overlap(plock, pend_lock)) {
1216                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1217                                 procid_str_static(&pend_lock->context.pid )));
1218
1219                         messaging_send(msg_ctx, pend_lock->context.pid,
1220                                        MSG_SMB_UNLOCK, &data_blob_null);
1221                 }
1222         }
1223
1224         return True;
1225 }
1226
1227 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1228                                      struct messaging_context *msg_ctx,
1229                                      struct byte_range_lock *br_lck,
1230                                      const struct lock_struct *plock)
1231 {
1232         VFS_FIND(brl_unlock_windows);
1233         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1234 }
1235
1236 /****************************************************************************
1237  Unlock a range of bytes.
1238 ****************************************************************************/
1239
1240 bool brl_unlock(struct messaging_context *msg_ctx,
1241                 struct byte_range_lock *br_lck,
1242                 uint64_t smblctx,
1243                 struct server_id pid,
1244                 br_off start,
1245                 br_off size,
1246                 enum brl_flavour lock_flav)
1247 {
1248         struct lock_struct lock;
1249
1250         lock.context.smblctx = smblctx;
1251         lock.context.pid = pid;
1252         lock.context.tid = br_lck->fsp->conn->cnum;
1253         lock.start = start;
1254         lock.size = size;
1255         lock.fnum = br_lck->fsp->fnum;
1256         lock.lock_type = UNLOCK_LOCK;
1257         lock.lock_flav = lock_flav;
1258
1259         if (lock_flav == WINDOWS_LOCK) {
1260                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1261                     br_lck, &lock);
1262         } else {
1263                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1264         }
1265 }
1266
1267 /****************************************************************************
1268  Test if we could add a lock if we wanted to.
1269  Returns True if the region required is currently unlocked, False if locked.
1270 ****************************************************************************/
1271
1272 bool brl_locktest(struct byte_range_lock *br_lck,
1273                 uint64_t smblctx,
1274                 struct server_id pid,
1275                 br_off start,
1276                 br_off size, 
1277                 enum brl_type lock_type,
1278                 enum brl_flavour lock_flav)
1279 {
1280         bool ret = True;
1281         unsigned int i;
1282         struct lock_struct lock;
1283         const struct lock_struct *locks = br_lck->lock_data;
1284         files_struct *fsp = br_lck->fsp;
1285
1286         lock.context.smblctx = smblctx;
1287         lock.context.pid = pid;
1288         lock.context.tid = br_lck->fsp->conn->cnum;
1289         lock.start = start;
1290         lock.size = size;
1291         lock.fnum = fsp->fnum;
1292         lock.lock_type = lock_type;
1293         lock.lock_flav = lock_flav;
1294
1295         /* Make sure existing locks don't conflict */
1296         for (i=0; i < br_lck->num_locks; i++) {
1297                 /*
1298                  * Our own locks don't conflict.
1299                  */
1300                 if (brl_conflict_other(&locks[i], &lock)) {
1301                         return False;
1302                 }
1303         }
1304
1305         /*
1306          * There is no lock held by an SMB daemon, check to
1307          * see if there is a POSIX lock from a UNIX or NFS process.
1308          * This only conflicts with Windows locks, not POSIX locks.
1309          */
1310
1311         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1312                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1313
1314                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1315                         (double)start, (double)size, ret ? "locked" : "unlocked",
1316                         fsp->fnum, fsp_str_dbg(fsp)));
1317
1318                 /* We need to return the inverse of is_posix_locked. */
1319                 ret = !ret;
1320         }
1321
1322         /* no conflicts - we could have added it */
1323         return ret;
1324 }
1325
1326 /****************************************************************************
1327  Query for existing locks.
1328 ****************************************************************************/
1329
1330 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1331                 uint64_t *psmblctx,
1332                 struct server_id pid,
1333                 br_off *pstart,
1334                 br_off *psize, 
1335                 enum brl_type *plock_type,
1336                 enum brl_flavour lock_flav)
1337 {
1338         unsigned int i;
1339         struct lock_struct lock;
1340         const struct lock_struct *locks = br_lck->lock_data;
1341         files_struct *fsp = br_lck->fsp;
1342
1343         lock.context.smblctx = *psmblctx;
1344         lock.context.pid = pid;
1345         lock.context.tid = br_lck->fsp->conn->cnum;
1346         lock.start = *pstart;
1347         lock.size = *psize;
1348         lock.fnum = fsp->fnum;
1349         lock.lock_type = *plock_type;
1350         lock.lock_flav = lock_flav;
1351
1352         /* Make sure existing locks don't conflict */
1353         for (i=0; i < br_lck->num_locks; i++) {
1354                 const struct lock_struct *exlock = &locks[i];
1355                 bool conflict = False;
1356
1357                 if (exlock->lock_flav == WINDOWS_LOCK) {
1358                         conflict = brl_conflict(exlock, &lock);
1359                 } else {        
1360                         conflict = brl_conflict_posix(exlock, &lock);
1361                 }
1362
1363                 if (conflict) {
1364                         *psmblctx = exlock->context.smblctx;
1365                         *pstart = exlock->start;
1366                         *psize = exlock->size;
1367                         *plock_type = exlock->lock_type;
1368                         return NT_STATUS_LOCK_NOT_GRANTED;
1369                 }
1370         }
1371
1372         /*
1373          * There is no lock held by an SMB daemon, check to
1374          * see if there is a POSIX lock from a UNIX or NFS process.
1375          */
1376
1377         if(lp_posix_locking(fsp->conn->params)) {
1378                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1379
1380                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1381                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1382                         fsp->fnum, fsp_str_dbg(fsp)));
1383
1384                 if (ret) {
1385                         /* Hmmm. No clue what to set smblctx to - use -1. */
1386                         *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1387                         return NT_STATUS_LOCK_NOT_GRANTED;
1388                 }
1389         }
1390
1391         return NT_STATUS_OK;
1392 }
1393
1394
1395 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1396                                      struct byte_range_lock *br_lck,
1397                                      struct lock_struct *plock,
1398                                      struct blocking_lock_record *blr)
1399 {
1400         VFS_FIND(brl_cancel_windows);
1401         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1402 }
1403
1404 /****************************************************************************
1405  Remove a particular pending lock.
1406 ****************************************************************************/
1407 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1408                 uint64_t smblctx,
1409                 struct server_id pid,
1410                 br_off start,
1411                 br_off size,
1412                 enum brl_flavour lock_flav,
1413                 struct blocking_lock_record *blr)
1414 {
1415         bool ret;
1416         struct lock_struct lock;
1417
1418         lock.context.smblctx = smblctx;
1419         lock.context.pid = pid;
1420         lock.context.tid = br_lck->fsp->conn->cnum;
1421         lock.start = start;
1422         lock.size = size;
1423         lock.fnum = br_lck->fsp->fnum;
1424         lock.lock_flav = lock_flav;
1425         /* lock.lock_type doesn't matter */
1426
1427         if (lock_flav == WINDOWS_LOCK) {
1428                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1429                     &lock, blr);
1430         } else {
1431                 ret = brl_lock_cancel_default(br_lck, &lock);
1432         }
1433
1434         return ret;
1435 }
1436
1437 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1438                 struct lock_struct *plock)
1439 {
1440         unsigned int i;
1441         struct lock_struct *locks = br_lck->lock_data;
1442
1443         SMB_ASSERT(plock);
1444
1445         for (i = 0; i < br_lck->num_locks; i++) {
1446                 struct lock_struct *lock = &locks[i];
1447
1448                 /* For pending locks we *always* care about the fnum. */
1449                 if (brl_same_context(&lock->context, &plock->context) &&
1450                                 lock->fnum == plock->fnum &&
1451                                 IS_PENDING_LOCK(lock->lock_type) &&
1452                                 lock->lock_flav == plock->lock_flav &&
1453                                 lock->start == plock->start &&
1454                                 lock->size == plock->size) {
1455                         break;
1456                 }
1457         }
1458
1459         if (i == br_lck->num_locks) {
1460                 /* Didn't find it. */
1461                 return False;
1462         }
1463
1464         if (i < br_lck->num_locks - 1) {
1465                 /* Found this particular pending lock - delete it */
1466                 memmove(&locks[i], &locks[i+1], 
1467                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1468         }
1469
1470         br_lck->num_locks -= 1;
1471         br_lck->modified = True;
1472         return True;
1473 }
1474
1475 /****************************************************************************
1476  Remove any locks associated with a open file.
1477  We return True if this process owns any other Windows locks on this
1478  fd and so we should not immediately close the fd.
1479 ****************************************************************************/
1480
1481 void brl_close_fnum(struct messaging_context *msg_ctx,
1482                     struct byte_range_lock *br_lck)
1483 {
1484         files_struct *fsp = br_lck->fsp;
1485         uint16 tid = fsp->conn->cnum;
1486         int fnum = fsp->fnum;
1487         unsigned int i, j, dcount=0;
1488         int num_deleted_windows_locks = 0;
1489         struct lock_struct *locks = br_lck->lock_data;
1490         struct server_id pid = sconn_server_id(fsp->conn->sconn);
1491         bool unlock_individually = False;
1492         bool posix_level2_contention_ended = false;
1493
1494         if(lp_posix_locking(fsp->conn->params)) {
1495
1496                 /* Check if there are any Windows locks associated with this dev/ino
1497                    pair that are not this fnum. If so we need to call unlock on each
1498                    one in order to release the system POSIX locks correctly. */
1499
1500                 for (i=0; i < br_lck->num_locks; i++) {
1501                         struct lock_struct *lock = &locks[i];
1502
1503                         if (!procid_equal(&lock->context.pid, &pid)) {
1504                                 continue;
1505                         }
1506
1507                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1508                                 continue; /* Ignore pending. */
1509                         }
1510
1511                         if (lock->context.tid != tid || lock->fnum != fnum) {
1512                                 unlock_individually = True;
1513                                 break;
1514                         }
1515                 }
1516
1517                 if (unlock_individually) {
1518                         struct lock_struct *locks_copy;
1519                         unsigned int num_locks_copy;
1520
1521                         /* Copy the current lock array. */
1522                         if (br_lck->num_locks) {
1523                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1524                                 if (!locks_copy) {
1525                                         smb_panic("brl_close_fnum: talloc failed");
1526                                 }
1527                         } else {        
1528                                 locks_copy = NULL;
1529                         }
1530
1531                         num_locks_copy = br_lck->num_locks;
1532
1533                         for (i=0; i < num_locks_copy; i++) {
1534                                 struct lock_struct *lock = &locks_copy[i];
1535
1536                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1537                                                 (lock->fnum == fnum)) {
1538                                         brl_unlock(msg_ctx,
1539                                                 br_lck,
1540                                                 lock->context.smblctx,
1541                                                 pid,
1542                                                 lock->start,
1543                                                 lock->size,
1544                                                 lock->lock_flav);
1545                                 }
1546                         }
1547                         return;
1548                 }
1549         }
1550
1551         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1552
1553         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1554
1555         for (i=0; i < br_lck->num_locks; i++) {
1556                 struct lock_struct *lock = &locks[i];
1557                 bool del_this_lock = False;
1558
1559                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1560                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1561                                 del_this_lock = True;
1562                                 num_deleted_windows_locks++;
1563                                 contend_level2_oplocks_end(br_lck->fsp,
1564                                     LEVEL2_CONTEND_WINDOWS_BRL);
1565                         } else if (lock->lock_flav == POSIX_LOCK) {
1566                                 del_this_lock = True;
1567
1568                                 /* Only end level2 contention once for posix */
1569                                 if (!posix_level2_contention_ended) {
1570                                         posix_level2_contention_ended = true;
1571                                         contend_level2_oplocks_end(br_lck->fsp,
1572                                             LEVEL2_CONTEND_POSIX_BRL);
1573                                 }
1574                         }
1575                 }
1576
1577                 if (del_this_lock) {
1578                         /* Send unlock messages to any pending waiters that overlap. */
1579                         for (j=0; j < br_lck->num_locks; j++) {
1580                                 struct lock_struct *pend_lock = &locks[j];
1581
1582                                 /* Ignore our own or non-pending locks. */
1583                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1584                                         continue;
1585                                 }
1586
1587                                 /* Optimisation - don't send to this fnum as we're
1588                                    closing it. */
1589                                 if (pend_lock->context.tid == tid &&
1590                                     procid_equal(&pend_lock->context.pid, &pid) &&
1591                                     pend_lock->fnum == fnum) {
1592                                         continue;
1593                                 }
1594
1595                                 /* We could send specific lock info here... */
1596                                 if (brl_pending_overlap(lock, pend_lock)) {
1597                                         messaging_send(msg_ctx, pend_lock->context.pid,
1598                                                        MSG_SMB_UNLOCK, &data_blob_null);
1599                                 }
1600                         }
1601
1602                         /* found it - delete it */
1603                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1604                                 memmove(&locks[i], &locks[i+1], 
1605                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1606                         }
1607                         br_lck->num_locks--;
1608                         br_lck->modified = True;
1609                         i--;
1610                         dcount++;
1611                 }
1612         }
1613
1614         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1615                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1616                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1617         }
1618 }
1619
1620 /****************************************************************************
1621  Ensure this set of lock entries is valid.
1622 ****************************************************************************/
1623 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1624 {
1625         unsigned int i;
1626         unsigned int num_valid_entries = 0;
1627         struct lock_struct *locks = *pplocks;
1628
1629         for (i = 0; i < *pnum_entries; i++) {
1630                 struct lock_struct *lock_data = &locks[i];
1631                 if (!serverid_exists(&lock_data->context.pid)) {
1632                         /* This process no longer exists - mark this
1633                            entry as invalid by zeroing it. */
1634                         ZERO_STRUCTP(lock_data);
1635                 } else {
1636                         num_valid_entries++;
1637                 }
1638         }
1639
1640         if (num_valid_entries != *pnum_entries) {
1641                 struct lock_struct *new_lock_data = NULL;
1642
1643                 if (num_valid_entries) {
1644                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1645                         if (!new_lock_data) {
1646                                 DEBUG(3, ("malloc fail\n"));
1647                                 return False;
1648                         }
1649
1650                         num_valid_entries = 0;
1651                         for (i = 0; i < *pnum_entries; i++) {
1652                                 struct lock_struct *lock_data = &locks[i];
1653                                 if (lock_data->context.smblctx &&
1654                                                 lock_data->context.tid) {
1655                                         /* Valid (nonzero) entry - copy it. */
1656                                         memcpy(&new_lock_data[num_valid_entries],
1657                                                 lock_data, sizeof(struct lock_struct));
1658                                         num_valid_entries++;
1659                                 }
1660                         }
1661                 }
1662
1663                 SAFE_FREE(*pplocks);
1664                 *pplocks = new_lock_data;
1665                 *pnum_entries = num_valid_entries;
1666         }
1667
1668         return True;
1669 }
1670
1671 struct brl_forall_cb {
1672         void (*fn)(struct file_id id, struct server_id pid,
1673                    enum brl_type lock_type,
1674                    enum brl_flavour lock_flav,
1675                    br_off start, br_off size,
1676                    void *private_data);
1677         void *private_data;
1678 };
1679
1680 /****************************************************************************
1681  Traverse the whole database with this function, calling traverse_callback
1682  on each lock.
1683 ****************************************************************************/
1684
1685 static int traverse_fn(struct db_record *rec, void *state)
1686 {
1687         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1688         struct lock_struct *locks;
1689         struct file_id *key;
1690         unsigned int i;
1691         unsigned int num_locks = 0;
1692         unsigned int orig_num_locks = 0;
1693
1694         /* In a traverse function we must make a copy of
1695            dbuf before modifying it. */
1696
1697         locks = (struct lock_struct *)memdup(rec->value.dptr,
1698                                              rec->value.dsize);
1699         if (!locks) {
1700                 return -1; /* Terminate traversal. */
1701         }
1702
1703         key = (struct file_id *)rec->key.dptr;
1704         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1705
1706         /* Ensure the lock db is clean of entries from invalid processes. */
1707
1708         if (!validate_lock_entries(&num_locks, &locks)) {
1709                 SAFE_FREE(locks);
1710                 return -1; /* Terminate traversal */
1711         }
1712
1713         if (orig_num_locks != num_locks) {
1714                 if (num_locks) {
1715                         TDB_DATA data;
1716                         data.dptr = (uint8_t *)locks;
1717                         data.dsize = num_locks*sizeof(struct lock_struct);
1718                         rec->store(rec, data, TDB_REPLACE);
1719                 } else {
1720                         rec->delete_rec(rec);
1721                 }
1722         }
1723
1724         if (cb->fn) {
1725                 for ( i=0; i<num_locks; i++) {
1726                         cb->fn(*key,
1727                                 locks[i].context.pid,
1728                                 locks[i].lock_type,
1729                                 locks[i].lock_flav,
1730                                 locks[i].start,
1731                                 locks[i].size,
1732                                 cb->private_data);
1733                 }
1734         }
1735
1736         SAFE_FREE(locks);
1737         return 0;
1738 }
1739
1740 /*******************************************************************
1741  Call the specified function on each lock in the database.
1742 ********************************************************************/
1743
1744 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1745                           enum brl_type lock_type,
1746                           enum brl_flavour lock_flav,
1747                           br_off start, br_off size,
1748                           void *private_data),
1749                void *private_data)
1750 {
1751         struct brl_forall_cb cb;
1752
1753         if (!brlock_db) {
1754                 return 0;
1755         }
1756         cb.fn = fn;
1757         cb.private_data = private_data;
1758         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1759 }
1760
1761 /*******************************************************************
1762  Store a potentially modified set of byte range lock data back into
1763  the database.
1764  Unlock the record.
1765 ********************************************************************/
1766
1767 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
1768 {
1769         if (br_lck->read_only) {
1770                 SMB_ASSERT(!br_lck->modified);
1771         }
1772
1773         if (!br_lck->modified) {
1774                 goto done;
1775         }
1776
1777         if (br_lck->num_locks == 0) {
1778                 /* No locks - delete this entry. */
1779                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1780                 if (!NT_STATUS_IS_OK(status)) {
1781                         DEBUG(0, ("delete_rec returned %s\n",
1782                                   nt_errstr(status)));
1783                         smb_panic("Could not delete byte range lock entry");
1784                 }
1785         } else {
1786                 TDB_DATA data;
1787                 NTSTATUS status;
1788
1789                 data.dptr = (uint8 *)br_lck->lock_data;
1790                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1791
1792                 status = br_lck->record->store(br_lck->record, data,
1793                                                TDB_REPLACE);
1794                 if (!NT_STATUS_IS_OK(status)) {
1795                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1796                         smb_panic("Could not store byte range mode entry");
1797                 }
1798         }
1799
1800  done:
1801
1802         br_lck->read_only = true;
1803         br_lck->modified = false;
1804
1805         TALLOC_FREE(br_lck->record);
1806 }
1807
1808 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1809 {
1810         byte_range_lock_flush(br_lck);
1811         SAFE_FREE(br_lck->lock_data);
1812         return 0;
1813 }
1814
1815 /*******************************************************************
1816  Fetch a set of byte range lock data from the database.
1817  Leave the record locked.
1818  TALLOC_FREE(brl) will release the lock in the destructor.
1819 ********************************************************************/
1820
1821 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1822                                         files_struct *fsp, bool read_only)
1823 {
1824         TDB_DATA key, data;
1825         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1826         bool do_read_only = read_only;
1827
1828         if (br_lck == NULL) {
1829                 return NULL;
1830         }
1831
1832         br_lck->fsp = fsp;
1833         br_lck->num_locks = 0;
1834         br_lck->modified = False;
1835         br_lck->key = fsp->file_id;
1836
1837         key.dptr = (uint8 *)&br_lck->key;
1838         key.dsize = sizeof(struct file_id);
1839
1840         if (!fsp->lockdb_clean) {
1841                 /* We must be read/write to clean
1842                    the dead entries. */
1843                 do_read_only = false;
1844         }
1845
1846         if (do_read_only) {
1847                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1848                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1849                         TALLOC_FREE(br_lck);
1850                         return NULL;
1851                 }
1852                 br_lck->record = NULL;
1853         } else {
1854                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1855
1856                 if (br_lck->record == NULL) {
1857                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1858                         TALLOC_FREE(br_lck);
1859                         return NULL;
1860                 }
1861
1862                 data = br_lck->record->value;
1863         }
1864
1865         br_lck->read_only = do_read_only;
1866         br_lck->lock_data = NULL;
1867
1868         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1869
1870         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1871
1872         if (br_lck->num_locks != 0) {
1873                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1874                                                      br_lck->num_locks);
1875                 if (br_lck->lock_data == NULL) {
1876                         DEBUG(0, ("malloc failed\n"));
1877                         TALLOC_FREE(br_lck);
1878                         return NULL;
1879                 }
1880
1881                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1882         }
1883         
1884         if (!fsp->lockdb_clean) {
1885                 int orig_num_locks = br_lck->num_locks;
1886
1887                 /* This is the first time we've accessed this. */
1888                 /* Go through and ensure all entries exist - remove any that don't. */
1889                 /* Makes the lockdb self cleaning at low cost. */
1890
1891                 if (!validate_lock_entries(&br_lck->num_locks,
1892                                            &br_lck->lock_data)) {
1893                         SAFE_FREE(br_lck->lock_data);
1894                         TALLOC_FREE(br_lck);
1895                         return NULL;
1896                 }
1897
1898                 /* Ensure invalid locks are cleaned up in the destructor. */
1899                 if (orig_num_locks != br_lck->num_locks) {
1900                         br_lck->modified = True;
1901                 }
1902
1903                 /* Mark the lockdb as "clean" as seen from this open file. */
1904                 fsp->lockdb_clean = True;
1905         }
1906
1907         if (DEBUGLEVEL >= 10) {
1908                 unsigned int i;
1909                 struct lock_struct *locks = br_lck->lock_data;
1910                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1911                         br_lck->num_locks,
1912                           file_id_string_tos(&fsp->file_id)));
1913                 for( i = 0; i < br_lck->num_locks; i++) {
1914                         print_lock_struct(i, &locks[i]);
1915                 }
1916         }
1917
1918         if (do_read_only != read_only) {
1919                 /*
1920                  * this stores the record and gets rid of
1921                  * the write lock that is needed for a cleanup
1922                  */
1923                 byte_range_lock_flush(br_lck);
1924         }
1925
1926         return br_lck;
1927 }
1928
1929 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1930                                         files_struct *fsp)
1931 {
1932         return brl_get_locks_internal(mem_ctx, fsp, False);
1933 }
1934
1935 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1936 {
1937         struct byte_range_lock *br_lock;
1938
1939         if (lp_clustering()) {
1940                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1941         }
1942
1943         if ((fsp->brlock_rec != NULL)
1944             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1945                 return fsp->brlock_rec;
1946         }
1947
1948         TALLOC_FREE(fsp->brlock_rec);
1949
1950         br_lock = brl_get_locks_internal(talloc_tos(), fsp, true);
1951         if (br_lock == NULL) {
1952                 return NULL;
1953         }
1954         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1955
1956         fsp->brlock_rec = talloc_move(fsp, &br_lock);
1957
1958         return fsp->brlock_rec;
1959 }
1960
1961 struct brl_revalidate_state {
1962         ssize_t array_size;
1963         uint32 num_pids;
1964         struct server_id *pids;
1965 };
1966
1967 /*
1968  * Collect PIDs of all processes with pending entries
1969  */
1970
1971 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1972                                    enum brl_type lock_type,
1973                                    enum brl_flavour lock_flav,
1974                                    br_off start, br_off size,
1975                                    void *private_data)
1976 {
1977         struct brl_revalidate_state *state =
1978                 (struct brl_revalidate_state *)private_data;
1979
1980         if (!IS_PENDING_LOCK(lock_type)) {
1981                 return;
1982         }
1983
1984         add_to_large_array(state, sizeof(pid), (void *)&pid,
1985                            &state->pids, &state->num_pids,
1986                            &state->array_size);
1987 }
1988
1989 /*
1990  * qsort callback to sort the processes
1991  */
1992
1993 static int compare_procids(const void *p1, const void *p2)
1994 {
1995         const struct server_id *i1 = (struct server_id *)p1;
1996         const struct server_id *i2 = (struct server_id *)p2;
1997
1998         if (i1->pid < i2->pid) return -1;
1999         if (i2->pid > i2->pid) return 1;
2000         return 0;
2001 }
2002
2003 /*
2004  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2005  * locks so that they retry. Mainly used in the cluster code after a node has
2006  * died.
2007  *
2008  * Done in two steps to avoid double-sends: First we collect all entries in an
2009  * array, then qsort that array and only send to non-dupes.
2010  */
2011
2012 static void brl_revalidate(struct messaging_context *msg_ctx,
2013                            void *private_data,
2014                            uint32_t msg_type,
2015                            struct server_id server_id,
2016                            DATA_BLOB *data)
2017 {
2018         struct brl_revalidate_state *state;
2019         uint32 i;
2020         struct server_id last_pid;
2021
2022         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
2023                 DEBUG(0, ("talloc failed\n"));
2024                 return;
2025         }
2026
2027         brl_forall(brl_revalidate_collect, state);
2028
2029         if (state->array_size == -1) {
2030                 DEBUG(0, ("talloc failed\n"));
2031                 goto done;
2032         }
2033
2034         if (state->num_pids == 0) {
2035                 goto done;
2036         }
2037
2038         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2039
2040         ZERO_STRUCT(last_pid);
2041
2042         for (i=0; i<state->num_pids; i++) {
2043                 if (procid_equal(&last_pid, &state->pids[i])) {
2044                         /*
2045                          * We've seen that one already
2046                          */
2047                         continue;
2048                 }
2049
2050                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2051                                &data_blob_null);
2052                 last_pid = state->pids[i];
2053         }
2054
2055  done:
2056         TALLOC_FREE(state);
2057         return;
2058 }
2059
2060 void brl_register_msgs(struct messaging_context *msg_ctx)
2061 {
2062         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2063                            brl_revalidate);
2064 }