edba4ed30a0883f54bb70ca2f2f33ba607869256
[tprouty/samba.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28
29 #undef DBGC_CLASS
30 #define DBGC_CLASS DBGC_LOCKING
31
32 #define ZERO_ZERO 0
33
34 /* The open brlock.tdb database. */
35
36 static struct db_context *brlock_db;
37
38 /****************************************************************************
39  Debug info at level 10 for lock struct.
40 ****************************************************************************/
41
42 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
43 {
44         DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %u, ",
45                         i,
46                         (unsigned int)pls->context.smbpid,
47                         (unsigned int)pls->context.tid,
48                         (unsigned int)procid_to_pid(&pls->context.pid) ));
49         
50         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
51                 (double)pls->start,
52                 (double)pls->size,
53                 pls->fnum,
54                 lock_type_name(pls->lock_type),
55                 lock_flav_name(pls->lock_flav) ));
56 }
57
58 /****************************************************************************
59  See if two locking contexts are equal.
60 ****************************************************************************/
61
62 bool brl_same_context(const struct lock_context *ctx1, 
63                              const struct lock_context *ctx2)
64 {
65         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
66                 (ctx1->smbpid == ctx2->smbpid) &&
67                 (ctx1->tid == ctx2->tid));
68 }
69
70 /****************************************************************************
71  See if lck1 and lck2 overlap.
72 ****************************************************************************/
73
74 static bool brl_overlap(const struct lock_struct *lck1,
75                         const struct lock_struct *lck2)
76 {
77         /* this extra check is not redundent - it copes with locks
78            that go beyond the end of 64 bit file space */
79         if (lck1->size != 0 &&
80             lck1->start == lck2->start &&
81             lck1->size == lck2->size) {
82                 return True;
83         }
84
85         if (lck1->start >= (lck2->start+lck2->size) ||
86             lck2->start >= (lck1->start+lck1->size)) {
87                 return False;
88         }
89         return True;
90 }
91
92 /****************************************************************************
93  See if lock2 can be added when lock1 is in place.
94 ****************************************************************************/
95
96 static bool brl_conflict(const struct lock_struct *lck1, 
97                          const struct lock_struct *lck2)
98 {
99         /* Ignore PENDING locks. */
100         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
101                 return False;
102
103         /* Read locks never conflict. */
104         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
105                 return False;
106         }
107
108         if (brl_same_context(&lck1->context, &lck2->context) &&
109             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
110                 return False;
111         }
112
113         return brl_overlap(lck1, lck2);
114
115
116 /****************************************************************************
117  See if lock2 can be added when lock1 is in place - when both locks are POSIX
118  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
119  know already match.
120 ****************************************************************************/
121
122 static bool brl_conflict_posix(const struct lock_struct *lck1, 
123                                 const struct lock_struct *lck2)
124 {
125 #if defined(DEVELOPER)
126         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
127         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
128 #endif
129
130         /* Ignore PENDING locks. */
131         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
132                 return False;
133
134         /* Read locks never conflict. */
135         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
136                 return False;
137         }
138
139         /* Locks on the same context con't conflict. Ignore fnum. */
140         if (brl_same_context(&lck1->context, &lck2->context)) {
141                 return False;
142         }
143
144         /* One is read, the other write, or the context is different,
145            do they overlap ? */
146         return brl_overlap(lck1, lck2);
147
148
149 #if ZERO_ZERO
150 static bool brl_conflict1(const struct lock_struct *lck1, 
151                          const struct lock_struct *lck2)
152 {
153         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
154                 return False;
155
156         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
157                 return False;
158         }
159
160         if (brl_same_context(&lck1->context, &lck2->context) &&
161             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
162                 return False;
163         }
164
165         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
166                 return True;
167         }
168
169         if (lck1->start >= (lck2->start + lck2->size) ||
170             lck2->start >= (lck1->start + lck1->size)) {
171                 return False;
172         }
173             
174         return True;
175
176 #endif
177
178 /****************************************************************************
179  Check to see if this lock conflicts, but ignore our own locks on the
180  same fnum only. This is the read/write lock check code path.
181  This is never used in the POSIX lock case.
182 ****************************************************************************/
183
184 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
185 {
186         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
187                 return False;
188
189         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
190                 return False;
191
192         /* POSIX flavour locks never conflict here - this is only called
193            in the read/write path. */
194
195         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
196                 return False;
197
198         /*
199          * Incoming WRITE locks conflict with existing READ locks even
200          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
201          */
202
203         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
204                 if (brl_same_context(&lck1->context, &lck2->context) &&
205                                         lck1->fnum == lck2->fnum)
206                         return False;
207         }
208
209         return brl_overlap(lck1, lck2);
210
211
212 /****************************************************************************
213  Check if an unlock overlaps a pending lock.
214 ****************************************************************************/
215
216 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
217 {
218         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
219                 return True;
220         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
221                 return True;
222         return False;
223 }
224
225 /****************************************************************************
226  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
227  is the same as this one and changes its error code. I wonder if any
228  app depends on this ?
229 ****************************************************************************/
230
231 static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
232 {
233         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
234                 /* amazing the little things you learn with a test
235                    suite. Locks beyond this offset (as a 64 bit
236                    number!) always generate the conflict error code,
237                    unless the top bit is set */
238                 if (!blocking_lock) {
239                         fsp->last_lock_failure = *lock;
240                 }
241                 return NT_STATUS_FILE_LOCK_CONFLICT;
242         }
243
244         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
245                         lock->context.tid == fsp->last_lock_failure.context.tid &&
246                         lock->fnum == fsp->last_lock_failure.fnum &&
247                         lock->start == fsp->last_lock_failure.start) {
248                 return NT_STATUS_FILE_LOCK_CONFLICT;
249         }
250
251         if (!blocking_lock) {
252                 fsp->last_lock_failure = *lock;
253         }
254         return NT_STATUS_LOCK_NOT_GRANTED;
255 }
256
257 /****************************************************************************
258  Open up the brlock.tdb database.
259 ****************************************************************************/
260
261 void brl_init(bool read_only)
262 {
263         if (brlock_db) {
264                 return;
265         }
266         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
267                             lp_open_files_db_hash_size(),
268                             TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST,
269                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
270         if (!brlock_db) {
271                 DEBUG(0,("Failed to open byte range locking database %s\n",
272                         lock_path("brlock.tdb")));
273                 return;
274         }
275 }
276
277 /****************************************************************************
278  Close down the brlock.tdb database.
279 ****************************************************************************/
280
281 void brl_shutdown(void)
282 {
283         TALLOC_FREE(brlock_db);
284 }
285
286 #if ZERO_ZERO
287 /****************************************************************************
288  Compare two locks for sorting.
289 ****************************************************************************/
290
291 static int lock_compare(const struct lock_struct *lck1, 
292                          const struct lock_struct *lck2)
293 {
294         if (lck1->start != lck2->start) {
295                 return (lck1->start - lck2->start);
296         }
297         if (lck2->size != lck1->size) {
298                 return ((int)lck1->size - (int)lck2->size);
299         }
300         return 0;
301 }
302 #endif
303
304 /****************************************************************************
305  Lock a range of bytes - Windows lock semantics.
306 ****************************************************************************/
307
308 static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
309                         struct lock_struct *plock, bool blocking_lock)
310 {
311         unsigned int i;
312         files_struct *fsp = br_lck->fsp;
313         struct lock_struct *locks = br_lck->lock_data;
314         NTSTATUS status;
315
316         for (i=0; i < br_lck->num_locks; i++) {
317                 /* Do any Windows or POSIX locks conflict ? */
318                 if (brl_conflict(&locks[i], plock)) {
319                         /* Remember who blocked us. */
320                         plock->context.smbpid = locks[i].context.smbpid;
321                         return brl_lock_failed(fsp,plock,blocking_lock);
322                 }
323 #if ZERO_ZERO
324                 if (plock->start == 0 && plock->size == 0 && 
325                                 locks[i].size == 0) {
326                         break;
327                 }
328 #endif
329         }
330
331         if (!IS_PENDING_LOCK(plock->lock_type)) {
332                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
333         }
334
335         /* We can get the Windows lock, now see if it needs to
336            be mapped into a lower level POSIX one, and if so can
337            we get it ? */
338
339         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
340                 int errno_ret;
341                 if (!set_posix_lock_windows_flavour(fsp,
342                                 plock->start,
343                                 plock->size,
344                                 plock->lock_type,
345                                 &plock->context,
346                                 locks,
347                                 br_lck->num_locks,
348                                 &errno_ret)) {
349
350                         /* We don't know who blocked us. */
351                         plock->context.smbpid = 0xFFFFFFFF;
352
353                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
354                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
355                                 goto fail;
356                         } else {
357                                 status = map_nt_error_from_unix(errno);
358                                 goto fail;
359                         }
360                 }
361         }
362
363         /* no conflicts - add it to the list of locks */
364         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
365         if (!locks) {
366                 status = NT_STATUS_NO_MEMORY;
367                 goto fail;
368         }
369
370         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
371         br_lck->num_locks += 1;
372         br_lck->lock_data = locks;
373         br_lck->modified = True;
374
375         return NT_STATUS_OK;
376  fail:
377         if (!IS_PENDING_LOCK(plock->lock_type)) {
378                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
379         }
380         return status;
381 }
382
383 /****************************************************************************
384  Cope with POSIX range splits and merges.
385 ****************************************************************************/
386
387 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,               /* Output array. */
388                                                 const struct lock_struct *ex,           /* existing lock. */
389                                                 const struct lock_struct *plock,        /* proposed lock. */
390                                                 bool *lock_was_added)
391 {
392         bool lock_types_differ = (ex->lock_type != plock->lock_type);
393
394         /* We can't merge non-conflicting locks on different context - ignore fnum. */
395
396         if (!brl_same_context(&ex->context, &plock->context)) {
397                 /* Just copy. */
398                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
399                 return 1;
400         }
401
402         /* We now know we have the same context. */
403
404         /* Did we overlap ? */
405
406 /*********************************************
407                                              +---------+
408                                              | ex      |
409                                              +---------+
410                               +-------+
411                               | plock |
412                               +-------+
413 OR....
414              +---------+
415              |  ex     |
416              +---------+
417 **********************************************/
418
419         if ( (ex->start > (plock->start + plock->size)) ||
420                         (plock->start > (ex->start + ex->size))) {
421                 /* No overlap with this lock - copy existing. */
422                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
423                 return 1;
424         }
425
426 /*********************************************
427         +---------------------------+
428         |          ex               |
429         +---------------------------+
430         +---------------------------+
431         |       plock               | -> replace with plock.
432         +---------------------------+
433 **********************************************/
434
435         if ( (ex->start >= plock->start) &&
436                         (ex->start + ex->size <= plock->start + plock->size) ) {
437                 memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
438                 *lock_was_added = True;
439                 return 1;
440         }
441
442 /*********************************************
443         +-----------------------+
444         |          ex           |
445         +-----------------------+
446         +---------------+
447         |   plock       |
448         +---------------+
449 OR....
450                         +-------+
451                         |  ex   |
452                         +-------+
453         +---------------+
454         |   plock       |
455         +---------------+
456
457 BECOMES....
458         +---------------+-------+
459         |   plock       | ex    | - different lock types.
460         +---------------+-------+
461 OR.... (merge)
462         +-----------------------+
463         |   ex                  | - same lock type.
464         +-----------------------+
465 **********************************************/
466
467         if ( (ex->start >= plock->start) &&
468                                 (ex->start <= plock->start + plock->size) &&
469                                 (ex->start + ex->size > plock->start + plock->size) ) {
470
471                 *lock_was_added = True;
472
473                 /* If the lock types are the same, we merge, if different, we
474                    add the new lock before the old. */
475
476                 if (lock_types_differ) {
477                         /* Add new. */
478                         memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
479                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
480                         /* Adjust existing start and size. */
481                         lck_arr[1].start = plock->start + plock->size;
482                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
483                         return 2;
484                 } else {
485                         /* Merge. */
486                         memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
487                         /* Set new start and size. */
488                         lck_arr[0].start = plock->start;
489                         lck_arr[0].size = (ex->start + ex->size) - plock->start;
490                         return 1;
491                 }
492         }
493
494 /*********************************************
495    +-----------------------+
496    |  ex                   |
497    +-----------------------+
498            +---------------+
499            |   plock       |
500            +---------------+
501 OR....
502    +-------+        
503    |  ex   |
504    +-------+
505            +---------------+
506            |   plock       |
507            +---------------+
508 BECOMES....
509    +-------+---------------+
510    | ex    |   plock       | - different lock types
511    +-------+---------------+
512
513 OR.... (merge)
514    +-----------------------+
515    | ex                    | - same lock type.
516    +-----------------------+
517
518 **********************************************/
519
520         if ( (ex->start < plock->start) &&
521                         (ex->start + ex->size >= plock->start) &&
522                         (ex->start + ex->size <= plock->start + plock->size) ) {
523
524                 *lock_was_added = True;
525
526                 /* If the lock types are the same, we merge, if different, we
527                    add the new lock after the old. */
528
529                 if (lock_types_differ) {
530                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
531                         memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
532                         /* Adjust existing size. */
533                         lck_arr[0].size = plock->start - ex->start;
534                         return 2;
535                 } else {
536                         /* Merge. */
537                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
538                         /* Adjust existing size. */
539                         lck_arr[0].size = (plock->start + plock->size) - ex->start;
540                         return 1;
541                 }
542         }
543
544 /*********************************************
545         +---------------------------+
546         |        ex                 |
547         +---------------------------+
548                 +---------+
549                 |  plock  |
550                 +---------+
551 BECOMES.....
552         +-------+---------+---------+
553         | ex    |  plock  | ex      | - different lock types.
554         +-------+---------+---------+
555 OR
556         +---------------------------+
557         |        ex                 | - same lock type.
558         +---------------------------+
559 **********************************************/
560
561         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
562                 *lock_was_added = True;
563
564                 if (lock_types_differ) {
565
566                         /* We have to split ex into two locks here. */
567
568                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
569                         memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
570                         memcpy(&lck_arr[2], ex, sizeof(struct lock_struct));
571
572                         /* Adjust first existing size. */
573                         lck_arr[0].size = plock->start - ex->start;
574
575                         /* Adjust second existing start and size. */
576                         lck_arr[2].start = plock->start + plock->size;
577                         lck_arr[2].size = (ex->start + ex->size) - (plock->start + plock->size);
578                         return 3;
579                 } else {
580                         /* Just eat plock. */
581                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
582                         return 1;
583                 }
584         }
585
586         /* Never get here. */
587         smb_panic("brlock_posix_split_merge");
588         /* Notreached. */
589
590         /* Keep some compilers happy. */
591         return 0;
592 }
593
594 /****************************************************************************
595  Lock a range of bytes - POSIX lock semantics.
596  We must cope with range splits and merges.
597 ****************************************************************************/
598
599 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
600                                struct byte_range_lock *br_lck,
601                                struct lock_struct *plock)
602 {
603         unsigned int i, count, posix_count;
604         struct lock_struct *locks = br_lck->lock_data;
605         struct lock_struct *tp;
606         bool lock_was_added = False;
607         bool signal_pending_read = False;
608         bool break_oplocks = false;
609         NTSTATUS status;
610
611         /* No zero-zero locks for POSIX. */
612         if (plock->start == 0 && plock->size == 0) {
613                 return NT_STATUS_INVALID_PARAMETER;
614         }
615
616         /* Don't allow 64-bit lock wrap. */
617         if (plock->start + plock->size < plock->start ||
618                         plock->start + plock->size < plock->size) {
619                 return NT_STATUS_INVALID_PARAMETER;
620         }
621
622         /* The worst case scenario here is we have to split an
623            existing POSIX lock range into two, and add our lock,
624            so we need at most 2 more entries. */
625
626         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
627         if (!tp) {
628                 return NT_STATUS_NO_MEMORY;
629         }
630         
631         count = posix_count = 0;
632         for (i=0; i < br_lck->num_locks; i++) {
633                 struct lock_struct *curr_lock = &locks[i];
634
635                 /* If we have a pending read lock, a lock downgrade should
636                    trigger a lock re-evaluation. */
637                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
638                                 brl_pending_overlap(plock, curr_lock)) {
639                         signal_pending_read = True;
640                 }
641
642                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
643                         /* Do any Windows flavour locks conflict ? */
644                         if (brl_conflict(curr_lock, plock)) {
645                                 /* No games with error messages. */
646                                 SAFE_FREE(tp);
647                                 /* Remember who blocked us. */
648                                 plock->context.smbpid = curr_lock->context.smbpid;
649                                 return NT_STATUS_FILE_LOCK_CONFLICT;
650                         }
651                         /* Just copy the Windows lock into the new array. */
652                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
653                         count++;
654                 } else {
655                         unsigned int tmp_count = 0;
656
657                         /* POSIX conflict semantics are different. */
658                         if (brl_conflict_posix(curr_lock, plock)) {
659                                 /* Can't block ourselves with POSIX locks. */
660                                 /* No games with error messages. */
661                                 SAFE_FREE(tp);
662                                 /* Remember who blocked us. */
663                                 plock->context.smbpid = curr_lock->context.smbpid;
664                                 return NT_STATUS_FILE_LOCK_CONFLICT;
665                         }
666
667                         /* Work out overlaps. */
668                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
669                         posix_count += tmp_count;
670                         count += tmp_count;
671                 }
672         }
673
674         /*
675          * Break oplocks while we hold a brl. Since lock() and unlock() calls
676          * are not symetric with POSIX semantics, we cannot guarantee our
677          * contend_level2_oplocks_begin/end calls will be acquired and
678          * released one-for-one as with Windows semantics. Therefore we only
679          * call contend_level2_oplocks_begin if this is the first POSIX brl on
680          * the file.
681          */
682         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
683                          posix_count == 0);
684         if (break_oplocks) {
685                 contend_level2_oplocks_begin(br_lck->fsp,
686                                              LEVEL2_CONTEND_POSIX_BRL);
687         }
688
689         if (!lock_was_added) {
690                 memcpy(&tp[count], plock, sizeof(struct lock_struct));
691                 count++;
692         }
693
694         /* We can get the POSIX lock, now see if it needs to
695            be mapped into a lower level POSIX one, and if so can
696            we get it ? */
697
698         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
699                 int errno_ret;
700
701                 /* The lower layer just needs to attempt to
702                    get the system POSIX lock. We've weeded out
703                    any conflicts above. */
704
705                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
706                                 plock->start,
707                                 plock->size,
708                                 plock->lock_type,
709                                 &errno_ret)) {
710
711                         /* We don't know who blocked us. */
712                         plock->context.smbpid = 0xFFFFFFFF;
713
714                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
715                                 SAFE_FREE(tp);
716                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
717                                 goto fail;
718                         } else {
719                                 SAFE_FREE(tp);
720                                 status = map_nt_error_from_unix(errno);
721                                 goto fail;
722                         }
723                 }
724         }
725
726         /* Realloc so we don't leak entries per lock call. */
727         tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
728         if (!tp) {
729                 status = NT_STATUS_NO_MEMORY;
730                 goto fail;
731         }
732         br_lck->num_locks = count;
733         SAFE_FREE(br_lck->lock_data);
734         br_lck->lock_data = tp;
735         locks = tp;
736         br_lck->modified = True;
737
738         /* A successful downgrade from write to read lock can trigger a lock
739            re-evalutation where waiting readers can now proceed. */
740
741         if (signal_pending_read) {
742                 /* Send unlock messages to any pending read waiters that overlap. */
743                 for (i=0; i < br_lck->num_locks; i++) {
744                         struct lock_struct *pend_lock = &locks[i];
745
746                         /* Ignore non-pending locks. */
747                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
748                                 continue;
749                         }
750
751                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
752                                         brl_pending_overlap(plock, pend_lock)) {
753                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
754                                         procid_str_static(&pend_lock->context.pid )));
755
756                                 messaging_send(msg_ctx, pend_lock->context.pid,
757                                                MSG_SMB_UNLOCK, &data_blob_null);
758                         }
759                 }
760         }
761
762         return NT_STATUS_OK;
763  fail:
764         if (break_oplocks) {
765                 contend_level2_oplocks_end(br_lck->fsp,
766                                            LEVEL2_CONTEND_POSIX_BRL);
767         }
768         return status;
769 }
770
771 /****************************************************************************
772  Lock a range of bytes.
773 ****************************************************************************/
774
775 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
776                 struct byte_range_lock *br_lck,
777                 uint32 smbpid,
778                 struct server_id pid,
779                 br_off start,
780                 br_off size, 
781                 enum brl_type lock_type,
782                 enum brl_flavour lock_flav,
783                 bool blocking_lock,
784                 uint32 *psmbpid)
785 {
786         NTSTATUS ret;
787         struct lock_struct lock;
788
789 #if !ZERO_ZERO
790         if (start == 0 && size == 0) {
791                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
792         }
793 #endif
794
795 #ifdef DEVELOPER
796         /* Quieten valgrind on test. */
797         memset(&lock, '\0', sizeof(lock));
798 #endif
799
800         lock.context.smbpid = smbpid;
801         lock.context.pid = pid;
802         lock.context.tid = br_lck->fsp->conn->cnum;
803         lock.start = start;
804         lock.size = size;
805         lock.fnum = br_lck->fsp->fnum;
806         lock.lock_type = lock_type;
807         lock.lock_flav = lock_flav;
808
809         if (lock_flav == WINDOWS_LOCK) {
810                 ret = brl_lock_windows(br_lck, &lock, blocking_lock);
811         } else {
812                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
813         }
814
815 #if ZERO_ZERO
816         /* sort the lock list */
817         qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
818 #endif
819
820         /* If we're returning an error, return who blocked us. */
821         if (!NT_STATUS_IS_OK(ret) && psmbpid) {
822                 *psmbpid = lock.context.smbpid;
823         }
824         return ret;
825 }
826
827 /****************************************************************************
828  Unlock a range of bytes - Windows semantics.
829 ****************************************************************************/
830
831 static bool brl_unlock_windows(struct messaging_context *msg_ctx,
832                                struct byte_range_lock *br_lck,
833                                const struct lock_struct *plock)
834 {
835         unsigned int i, j;
836         struct lock_struct *locks = br_lck->lock_data;
837         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
838
839 #if ZERO_ZERO
840         /* Delete write locks by preference... The lock list
841            is sorted in the zero zero case. */
842
843         for (i = 0; i < br_lck->num_locks; i++) {
844                 struct lock_struct *lock = &locks[i];
845
846                 if (lock->lock_type == WRITE_LOCK &&
847                     brl_same_context(&lock->context, &plock->context) &&
848                     lock->fnum == plock->fnum &&
849                     lock->lock_flav == WINDOWS_LOCK &&
850                     lock->start == plock->start &&
851                     lock->size == plock->size) {
852
853                         /* found it - delete it */
854                         deleted_lock_type = lock->lock_type;
855                         break;
856                 }
857         }
858
859         if (i != br_lck->num_locks) {
860                 /* We found it - don't search again. */
861                 goto unlock_continue;
862         }
863 #endif
864
865         for (i = 0; i < br_lck->num_locks; i++) {
866                 struct lock_struct *lock = &locks[i];
867
868                 /* Only remove our own locks that match in start, size, and flavour. */
869                 if (brl_same_context(&lock->context, &plock->context) &&
870                                         lock->fnum == plock->fnum &&
871                                         lock->lock_flav == WINDOWS_LOCK &&
872                                         lock->start == plock->start &&
873                                         lock->size == plock->size ) {
874                         deleted_lock_type = lock->lock_type;
875                         break;
876                 }
877         }
878
879         if (i == br_lck->num_locks) {
880                 /* we didn't find it */
881                 return False;
882         }
883
884 #if ZERO_ZERO
885   unlock_continue:
886 #endif
887
888         /* Actually delete the lock. */
889         if (i < br_lck->num_locks - 1) {
890                 memmove(&locks[i], &locks[i+1], 
891                         sizeof(*locks)*((br_lck->num_locks-1) - i));
892         }
893
894         br_lck->num_locks -= 1;
895         br_lck->modified = True;
896
897         /* Unlock the underlying POSIX regions. */
898         if(lp_posix_locking(br_lck->fsp->conn->params)) {
899                 release_posix_lock_windows_flavour(br_lck->fsp,
900                                 plock->start,
901                                 plock->size,
902                                 deleted_lock_type,
903                                 &plock->context,
904                                 locks,
905                                 br_lck->num_locks);
906         }
907
908         /* Send unlock messages to any pending waiters that overlap. */
909         for (j=0; j < br_lck->num_locks; j++) {
910                 struct lock_struct *pend_lock = &locks[j];
911
912                 /* Ignore non-pending locks. */
913                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
914                         continue;
915                 }
916
917                 /* We could send specific lock info here... */
918                 if (brl_pending_overlap(plock, pend_lock)) {
919                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
920                                 procid_str_static(&pend_lock->context.pid )));
921
922                         messaging_send(msg_ctx, pend_lock->context.pid,
923                                        MSG_SMB_UNLOCK, &data_blob_null);
924                 }
925         }
926
927         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
928         return True;
929 }
930
931 /****************************************************************************
932  Unlock a range of bytes - POSIX semantics.
933 ****************************************************************************/
934
935 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
936                              struct byte_range_lock *br_lck,
937                              const struct lock_struct *plock)
938 {
939         unsigned int i, j, count, posix_count;
940         struct lock_struct *tp;
941         struct lock_struct *locks = br_lck->lock_data;
942         bool overlap_found = False;
943
944         /* No zero-zero locks for POSIX. */
945         if (plock->start == 0 && plock->size == 0) {
946                 return False;
947         }
948
949         /* Don't allow 64-bit lock wrap. */
950         if (plock->start + plock->size < plock->start ||
951                         plock->start + plock->size < plock->size) {
952                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
953                 return False;
954         }
955
956         /* The worst case scenario here is we have to split an
957            existing POSIX lock range into two, so we need at most
958            1 more entry. */
959
960         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
961         if (!tp) {
962                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
963                 return False;
964         }
965
966         count = posix_count = 0;
967         for (i = 0; i < br_lck->num_locks; i++) {
968                 struct lock_struct *lock = &locks[i];
969                 struct lock_struct tmp_lock[3];
970                 bool lock_was_added = False;
971                 unsigned int tmp_count;
972
973                 /* Only remove our own locks - ignore fnum. */
974                 if (IS_PENDING_LOCK(lock->lock_type) ||
975                                 !brl_same_context(&lock->context, &plock->context)) {
976                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
977                         count++;
978                         continue;
979                 }
980
981                 /* Work out overlaps. */
982                 tmp_count = brlock_posix_split_merge(&tmp_lock[0], &locks[i], plock, &lock_was_added);
983
984                 if (tmp_count == 1) {
985                         /* Ether the locks didn't overlap, or the unlock completely
986                            overlapped this lock. If it didn't overlap, then there's
987                            no change in the locks. */
988                         if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
989                                 SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
990                                 /* No change in this lock. */
991                                 memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
992                                 count++;
993                                 posix_count++;
994                         } else {
995                                 SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
996                                 overlap_found = True;
997                         }
998                         continue;
999                 } else if (tmp_count == 2) {
1000                         /* The unlock overlapped an existing lock. Copy the truncated
1001                            lock into the lock array. */
1002                         if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
1003                                 SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
1004                                 SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
1005                                 memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
1006                                 if (tmp_lock[0].size != locks[i].size) {
1007                                         overlap_found = True;
1008                                 }
1009                         } else {
1010                                 SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
1011                                 SMB_ASSERT(tmp_lock[1].lock_type == locks[i].lock_type);
1012                                 memcpy(&tp[count], &tmp_lock[1], sizeof(struct lock_struct));
1013                                 if (tmp_lock[1].start != locks[i].start) {
1014                                         overlap_found = True;
1015                                 }
1016                         }
1017                         count++;
1018                         posix_count++;
1019                         continue;
1020                 } else {
1021                         /* tmp_count == 3 - (we split a lock range in two). */
1022                         SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
1023                         SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
1024                         SMB_ASSERT(tmp_lock[2].lock_type == locks[i].lock_type);
1025
1026                         memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
1027                         count++;
1028                         posix_count++;
1029                         memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
1030                         count++;
1031                         posix_count++;
1032                         overlap_found = True;
1033                         /* Optimisation... */
1034                         /* We know we're finished here as we can't overlap any
1035                            more POSIX locks. Copy the rest of the lock array. */
1036                         if (i < br_lck->num_locks - 1) {
1037                                 memcpy(&tp[count], &locks[i+1], 
1038                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1039                                 count += ((br_lck->num_locks-1) - i);
1040                         }
1041                         break;
1042                 }
1043         }
1044
1045         if (!overlap_found) {
1046                 /* Just ignore - no change. */
1047                 SAFE_FREE(tp);
1048                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1049                 return True;
1050         }
1051
1052         /* Unlock any POSIX regions. */
1053         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1054                 release_posix_lock_posix_flavour(br_lck->fsp,
1055                                                 plock->start,
1056                                                 plock->size,
1057                                                 &plock->context,
1058                                                 tp,
1059                                                 count);
1060         }
1061
1062         /* Realloc so we don't leak entries per unlock call. */
1063         if (count) {
1064                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1065                 if (!tp) {
1066                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1067                         return False;
1068                 }
1069         } else {
1070                 /* We deleted the last lock. */
1071                 SAFE_FREE(tp);
1072                 tp = NULL;
1073         }
1074
1075         if (posix_count == 0) {
1076                 contend_level2_oplocks_end(br_lck->fsp,
1077                                            LEVEL2_CONTEND_POSIX_BRL);
1078         }
1079
1080         br_lck->num_locks = count;
1081         SAFE_FREE(br_lck->lock_data);
1082         locks = tp;
1083         br_lck->lock_data = tp;
1084         br_lck->modified = True;
1085
1086         /* Send unlock messages to any pending waiters that overlap. */
1087
1088         for (j=0; j < br_lck->num_locks; j++) {
1089                 struct lock_struct *pend_lock = &locks[j];
1090
1091                 /* Ignore non-pending locks. */
1092                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1093                         continue;
1094                 }
1095
1096                 /* We could send specific lock info here... */
1097                 if (brl_pending_overlap(plock, pend_lock)) {
1098                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1099                                 procid_str_static(&pend_lock->context.pid )));
1100
1101                         messaging_send(msg_ctx, pend_lock->context.pid,
1102                                        MSG_SMB_UNLOCK, &data_blob_null);
1103                 }
1104         }
1105
1106         return True;
1107 }
1108
1109 /****************************************************************************
1110  Unlock a range of bytes.
1111 ****************************************************************************/
1112
1113 bool brl_unlock(struct messaging_context *msg_ctx,
1114                 struct byte_range_lock *br_lck,
1115                 uint32 smbpid,
1116                 struct server_id pid,
1117                 br_off start,
1118                 br_off size,
1119                 enum brl_flavour lock_flav)
1120 {
1121         struct lock_struct lock;
1122
1123         lock.context.smbpid = smbpid;
1124         lock.context.pid = pid;
1125         lock.context.tid = br_lck->fsp->conn->cnum;
1126         lock.start = start;
1127         lock.size = size;
1128         lock.fnum = br_lck->fsp->fnum;
1129         lock.lock_type = UNLOCK_LOCK;
1130         lock.lock_flav = lock_flav;
1131
1132         if (lock_flav == WINDOWS_LOCK) {
1133                 return brl_unlock_windows(msg_ctx, br_lck, &lock);
1134         } else {
1135                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1136         }
1137 }
1138
1139 /****************************************************************************
1140  Test if we could add a lock if we wanted to.
1141  Returns True if the region required is currently unlocked, False if locked.
1142 ****************************************************************************/
1143
1144 bool brl_locktest(struct byte_range_lock *br_lck,
1145                 uint32 smbpid,
1146                 struct server_id pid,
1147                 br_off start,
1148                 br_off size, 
1149                 enum brl_type lock_type,
1150                 enum brl_flavour lock_flav)
1151 {
1152         bool ret = True;
1153         unsigned int i;
1154         struct lock_struct lock;
1155         const struct lock_struct *locks = br_lck->lock_data;
1156         files_struct *fsp = br_lck->fsp;
1157
1158         lock.context.smbpid = smbpid;
1159         lock.context.pid = pid;
1160         lock.context.tid = br_lck->fsp->conn->cnum;
1161         lock.start = start;
1162         lock.size = size;
1163         lock.fnum = fsp->fnum;
1164         lock.lock_type = lock_type;
1165         lock.lock_flav = lock_flav;
1166
1167         /* Make sure existing locks don't conflict */
1168         for (i=0; i < br_lck->num_locks; i++) {
1169                 /*
1170                  * Our own locks don't conflict.
1171                  */
1172                 if (brl_conflict_other(&locks[i], &lock)) {
1173                         return False;
1174                 }
1175         }
1176
1177         /*
1178          * There is no lock held by an SMB daemon, check to
1179          * see if there is a POSIX lock from a UNIX or NFS process.
1180          * This only conflicts with Windows locks, not POSIX locks.
1181          */
1182
1183         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1184                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1185
1186                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1187                         (double)start, (double)size, ret ? "locked" : "unlocked",
1188                         fsp->fnum, fsp->fsp_name ));
1189
1190                 /* We need to return the inverse of is_posix_locked. */
1191                 ret = !ret;
1192         }
1193
1194         /* no conflicts - we could have added it */
1195         return ret;
1196 }
1197
1198 /****************************************************************************
1199  Query for existing locks.
1200 ****************************************************************************/
1201
1202 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1203                 uint32 *psmbpid,
1204                 struct server_id pid,
1205                 br_off *pstart,
1206                 br_off *psize, 
1207                 enum brl_type *plock_type,
1208                 enum brl_flavour lock_flav)
1209 {
1210         unsigned int i;
1211         struct lock_struct lock;
1212         const struct lock_struct *locks = br_lck->lock_data;
1213         files_struct *fsp = br_lck->fsp;
1214
1215         lock.context.smbpid = *psmbpid;
1216         lock.context.pid = pid;
1217         lock.context.tid = br_lck->fsp->conn->cnum;
1218         lock.start = *pstart;
1219         lock.size = *psize;
1220         lock.fnum = fsp->fnum;
1221         lock.lock_type = *plock_type;
1222         lock.lock_flav = lock_flav;
1223
1224         /* Make sure existing locks don't conflict */
1225         for (i=0; i < br_lck->num_locks; i++) {
1226                 const struct lock_struct *exlock = &locks[i];
1227                 bool conflict = False;
1228
1229                 if (exlock->lock_flav == WINDOWS_LOCK) {
1230                         conflict = brl_conflict(exlock, &lock);
1231                 } else {        
1232                         conflict = brl_conflict_posix(exlock, &lock);
1233                 }
1234
1235                 if (conflict) {
1236                         *psmbpid = exlock->context.smbpid;
1237                         *pstart = exlock->start;
1238                         *psize = exlock->size;
1239                         *plock_type = exlock->lock_type;
1240                         return NT_STATUS_LOCK_NOT_GRANTED;
1241                 }
1242         }
1243
1244         /*
1245          * There is no lock held by an SMB daemon, check to
1246          * see if there is a POSIX lock from a UNIX or NFS process.
1247          */
1248
1249         if(lp_posix_locking(fsp->conn->params)) {
1250                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1251
1252                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1253                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1254                         fsp->fnum, fsp->fsp_name ));
1255
1256                 if (ret) {
1257                         /* Hmmm. No clue what to set smbpid to - use -1. */
1258                         *psmbpid = 0xFFFF;
1259                         return NT_STATUS_LOCK_NOT_GRANTED;
1260                 }
1261         }
1262
1263         return NT_STATUS_OK;
1264 }
1265
1266 /****************************************************************************
1267  Remove a particular pending lock.
1268 ****************************************************************************/
1269
1270 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1271                 uint32 smbpid,
1272                 struct server_id pid,
1273                 br_off start,
1274                 br_off size,
1275                 enum brl_flavour lock_flav)
1276 {
1277         unsigned int i;
1278         struct lock_struct *locks = br_lck->lock_data;
1279         struct lock_context context;
1280
1281         context.smbpid = smbpid;
1282         context.pid = pid;
1283         context.tid = br_lck->fsp->conn->cnum;
1284
1285         for (i = 0; i < br_lck->num_locks; i++) {
1286                 struct lock_struct *lock = &locks[i];
1287
1288                 /* For pending locks we *always* care about the fnum. */
1289                 if (brl_same_context(&lock->context, &context) &&
1290                                 lock->fnum == br_lck->fsp->fnum &&
1291                                 IS_PENDING_LOCK(lock->lock_type) &&
1292                                 lock->lock_flav == lock_flav &&
1293                                 lock->start == start &&
1294                                 lock->size == size) {
1295                         break;
1296                 }
1297         }
1298
1299         if (i == br_lck->num_locks) {
1300                 /* Didn't find it. */
1301                 return False;
1302         }
1303
1304         if (i < br_lck->num_locks - 1) {
1305                 /* Found this particular pending lock - delete it */
1306                 memmove(&locks[i], &locks[i+1], 
1307                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1308         }
1309
1310         br_lck->num_locks -= 1;
1311         br_lck->modified = True;
1312         return True;
1313 }
1314
1315 /****************************************************************************
1316  Remove any locks associated with a open file.
1317  We return True if this process owns any other Windows locks on this
1318  fd and so we should not immediately close the fd.
1319 ****************************************************************************/
1320
1321 void brl_close_fnum(struct messaging_context *msg_ctx,
1322                     struct byte_range_lock *br_lck)
1323 {
1324         files_struct *fsp = br_lck->fsp;
1325         uint16 tid = fsp->conn->cnum;
1326         int fnum = fsp->fnum;
1327         unsigned int i, j, dcount=0;
1328         int num_deleted_windows_locks = 0;
1329         struct lock_struct *locks = br_lck->lock_data;
1330         struct server_id pid = procid_self();
1331         bool unlock_individually = False;
1332         bool posix_level2_contention_ended = false;
1333
1334         if(lp_posix_locking(fsp->conn->params)) {
1335
1336                 /* Check if there are any Windows locks associated with this dev/ino
1337                    pair that are not this fnum. If so we need to call unlock on each
1338                    one in order to release the system POSIX locks correctly. */
1339
1340                 for (i=0; i < br_lck->num_locks; i++) {
1341                         struct lock_struct *lock = &locks[i];
1342
1343                         if (!procid_equal(&lock->context.pid, &pid)) {
1344                                 continue;
1345                         }
1346
1347                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1348                                 continue; /* Ignore pending. */
1349                         }
1350
1351                         if (lock->context.tid != tid || lock->fnum != fnum) {
1352                                 unlock_individually = True;
1353                                 break;
1354                         }
1355                 }
1356
1357                 if (unlock_individually) {
1358                         struct lock_struct *locks_copy;
1359                         unsigned int num_locks_copy;
1360
1361                         /* Copy the current lock array. */
1362                         if (br_lck->num_locks) {
1363                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1364                                 if (!locks_copy) {
1365                                         smb_panic("brl_close_fnum: talloc failed");
1366                                 }
1367                         } else {        
1368                                 locks_copy = NULL;
1369                         }
1370
1371                         num_locks_copy = br_lck->num_locks;
1372
1373                         for (i=0; i < num_locks_copy; i++) {
1374                                 struct lock_struct *lock = &locks_copy[i];
1375
1376                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1377                                                 (lock->fnum == fnum)) {
1378                                         brl_unlock(msg_ctx,
1379                                                 br_lck,
1380                                                 lock->context.smbpid,
1381                                                 pid,
1382                                                 lock->start,
1383                                                 lock->size,
1384                                                 lock->lock_flav);
1385                                 }
1386                         }
1387                         return;
1388                 }
1389         }
1390
1391         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1392
1393         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1394
1395         for (i=0; i < br_lck->num_locks; i++) {
1396                 struct lock_struct *lock = &locks[i];
1397                 bool del_this_lock = False;
1398
1399                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1400                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1401                                 del_this_lock = True;
1402                                 num_deleted_windows_locks++;
1403                                 contend_level2_oplocks_end(br_lck->fsp,
1404                                     LEVEL2_CONTEND_WINDOWS_BRL);
1405                         } else if (lock->lock_flav == POSIX_LOCK) {
1406                                 del_this_lock = True;
1407
1408                                 /* Only end level2 contention once for posix */
1409                                 if (!posix_level2_contention_ended) {
1410                                         posix_level2_contention_ended = true;
1411                                         contend_level2_oplocks_end(br_lck->fsp,
1412                                             LEVEL2_CONTEND_POSIX_BRL);
1413                                 }
1414                         }
1415                 }
1416
1417                 if (del_this_lock) {
1418                         /* Send unlock messages to any pending waiters that overlap. */
1419                         for (j=0; j < br_lck->num_locks; j++) {
1420                                 struct lock_struct *pend_lock = &locks[j];
1421
1422                                 /* Ignore our own or non-pending locks. */
1423                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1424                                         continue;
1425                                 }
1426
1427                                 /* Optimisation - don't send to this fnum as we're
1428                                    closing it. */
1429                                 if (pend_lock->context.tid == tid &&
1430                                     procid_equal(&pend_lock->context.pid, &pid) &&
1431                                     pend_lock->fnum == fnum) {
1432                                         continue;
1433                                 }
1434
1435                                 /* We could send specific lock info here... */
1436                                 if (brl_pending_overlap(lock, pend_lock)) {
1437                                         messaging_send(msg_ctx, pend_lock->context.pid,
1438                                                        MSG_SMB_UNLOCK, &data_blob_null);
1439                                 }
1440                         }
1441
1442                         /* found it - delete it */
1443                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1444                                 memmove(&locks[i], &locks[i+1], 
1445                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1446                         }
1447                         br_lck->num_locks--;
1448                         br_lck->modified = True;
1449                         i--;
1450                         dcount++;
1451                 }
1452         }
1453
1454         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1455                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1456                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1457         }
1458 }
1459
1460 /****************************************************************************
1461  Ensure this set of lock entries is valid.
1462 ****************************************************************************/
1463
1464 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1465 {
1466         unsigned int i;
1467         unsigned int num_valid_entries = 0;
1468         struct lock_struct *locks = *pplocks;
1469
1470         for (i = 0; i < *pnum_entries; i++) {
1471                 struct lock_struct *lock_data = &locks[i];
1472                 if (!process_exists(lock_data->context.pid)) {
1473                         /* This process no longer exists - mark this
1474                            entry as invalid by zeroing it. */
1475                         ZERO_STRUCTP(lock_data);
1476                 } else {
1477                         num_valid_entries++;
1478                 }
1479         }
1480
1481         if (num_valid_entries != *pnum_entries) {
1482                 struct lock_struct *new_lock_data = NULL;
1483
1484                 if (num_valid_entries) {
1485                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1486                         if (!new_lock_data) {
1487                                 DEBUG(3, ("malloc fail\n"));
1488                                 return False;
1489                         }
1490
1491                         num_valid_entries = 0;
1492                         for (i = 0; i < *pnum_entries; i++) {
1493                                 struct lock_struct *lock_data = &locks[i];
1494                                 if (lock_data->context.smbpid &&
1495                                                 lock_data->context.tid) {
1496                                         /* Valid (nonzero) entry - copy it. */
1497                                         memcpy(&new_lock_data[num_valid_entries],
1498                                                 lock_data, sizeof(struct lock_struct));
1499                                         num_valid_entries++;
1500                                 }
1501                         }
1502                 }
1503
1504                 SAFE_FREE(*pplocks);
1505                 *pplocks = new_lock_data;
1506                 *pnum_entries = num_valid_entries;
1507         }
1508
1509         return True;
1510 }
1511
1512 struct brl_forall_cb {
1513         void (*fn)(struct file_id id, struct server_id pid,
1514                    enum brl_type lock_type,
1515                    enum brl_flavour lock_flav,
1516                    br_off start, br_off size,
1517                    void *private_data);
1518         void *private_data;
1519 };
1520
1521 /****************************************************************************
1522  Traverse the whole database with this function, calling traverse_callback
1523  on each lock.
1524 ****************************************************************************/
1525
1526 static int traverse_fn(struct db_record *rec, void *state)
1527 {
1528         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1529         struct lock_struct *locks;
1530         struct file_id *key;
1531         unsigned int i;
1532         unsigned int num_locks = 0;
1533         unsigned int orig_num_locks = 0;
1534
1535         /* In a traverse function we must make a copy of
1536            dbuf before modifying it. */
1537
1538         locks = (struct lock_struct *)memdup(rec->value.dptr,
1539                                              rec->value.dsize);
1540         if (!locks) {
1541                 return -1; /* Terminate traversal. */
1542         }
1543
1544         key = (struct file_id *)rec->key.dptr;
1545         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1546
1547         /* Ensure the lock db is clean of entries from invalid processes. */
1548
1549         if (!validate_lock_entries(&num_locks, &locks)) {
1550                 SAFE_FREE(locks);
1551                 return -1; /* Terminate traversal */
1552         }
1553
1554         if (orig_num_locks != num_locks) {
1555                 if (num_locks) {
1556                         TDB_DATA data;
1557                         data.dptr = (uint8_t *)locks;
1558                         data.dsize = num_locks*sizeof(struct lock_struct);
1559                         rec->store(rec, data, TDB_REPLACE);
1560                 } else {
1561                         rec->delete_rec(rec);
1562                 }
1563         }
1564
1565         if (cb->fn) {
1566                 for ( i=0; i<num_locks; i++) {
1567                         cb->fn(*key,
1568                                 locks[i].context.pid,
1569                                 locks[i].lock_type,
1570                                 locks[i].lock_flav,
1571                                 locks[i].start,
1572                                 locks[i].size,
1573                                 cb->private_data);
1574                 }
1575         }
1576
1577         SAFE_FREE(locks);
1578         return 0;
1579 }
1580
1581 /*******************************************************************
1582  Call the specified function on each lock in the database.
1583 ********************************************************************/
1584
1585 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1586                           enum brl_type lock_type,
1587                           enum brl_flavour lock_flav,
1588                           br_off start, br_off size,
1589                           void *private_data),
1590                void *private_data)
1591 {
1592         struct brl_forall_cb cb;
1593
1594         if (!brlock_db) {
1595                 return 0;
1596         }
1597         cb.fn = fn;
1598         cb.private_data = private_data;
1599         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1600 }
1601
1602 /*******************************************************************
1603  Store a potentially modified set of byte range lock data back into
1604  the database.
1605  Unlock the record.
1606 ********************************************************************/
1607
1608 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1609 {
1610         if (br_lck->read_only) {
1611                 SMB_ASSERT(!br_lck->modified);
1612         }
1613
1614         if (!br_lck->modified) {
1615                 goto done;
1616         }
1617
1618         if (br_lck->num_locks == 0) {
1619                 /* No locks - delete this entry. */
1620                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1621                 if (!NT_STATUS_IS_OK(status)) {
1622                         DEBUG(0, ("delete_rec returned %s\n",
1623                                   nt_errstr(status)));
1624                         smb_panic("Could not delete byte range lock entry");
1625                 }
1626         } else {
1627                 TDB_DATA data;
1628                 NTSTATUS status;
1629
1630                 data.dptr = (uint8 *)br_lck->lock_data;
1631                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1632
1633                 status = br_lck->record->store(br_lck->record, data,
1634                                                TDB_REPLACE);
1635                 if (!NT_STATUS_IS_OK(status)) {
1636                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1637                         smb_panic("Could not store byte range mode entry");
1638                 }
1639         }
1640
1641  done:
1642
1643         SAFE_FREE(br_lck->lock_data);
1644         TALLOC_FREE(br_lck->record);
1645         return 0;
1646 }
1647
1648 /*******************************************************************
1649  Fetch a set of byte range lock data from the database.
1650  Leave the record locked.
1651  TALLOC_FREE(brl) will release the lock in the destructor.
1652 ********************************************************************/
1653
1654 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1655                                         files_struct *fsp, bool read_only)
1656 {
1657         TDB_DATA key, data;
1658         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1659
1660         if (br_lck == NULL) {
1661                 return NULL;
1662         }
1663
1664         br_lck->fsp = fsp;
1665         br_lck->num_locks = 0;
1666         br_lck->modified = False;
1667         memset(&br_lck->key, '\0', sizeof(struct file_id));
1668         br_lck->key = fsp->file_id;
1669
1670         key.dptr = (uint8 *)&br_lck->key;
1671         key.dsize = sizeof(struct file_id);
1672
1673         if (!fsp->lockdb_clean) {
1674                 /* We must be read/write to clean
1675                    the dead entries. */
1676                 read_only = False;
1677         }
1678
1679         if (read_only) {
1680                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1681                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1682                         TALLOC_FREE(br_lck);
1683                         return NULL;
1684                 }
1685                 br_lck->record = NULL;
1686         }
1687         else {
1688                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1689
1690                 if (br_lck->record == NULL) {
1691                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1692                         TALLOC_FREE(br_lck);
1693                         return NULL;
1694                 }
1695
1696                 data = br_lck->record->value;
1697         }
1698
1699         br_lck->read_only = read_only;
1700         br_lck->lock_data = NULL;
1701
1702         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1703
1704         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1705
1706         if (br_lck->num_locks != 0) {
1707                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1708                                                      br_lck->num_locks);
1709                 if (br_lck->lock_data == NULL) {
1710                         DEBUG(0, ("malloc failed\n"));
1711                         TALLOC_FREE(br_lck);
1712                         return NULL;
1713                 }
1714
1715                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1716         }
1717         
1718         if (!fsp->lockdb_clean) {
1719                 int orig_num_locks = br_lck->num_locks;
1720
1721                 /* This is the first time we've accessed this. */
1722                 /* Go through and ensure all entries exist - remove any that don't. */
1723                 /* Makes the lockdb self cleaning at low cost. */
1724
1725                 if (!validate_lock_entries(&br_lck->num_locks,
1726                                            &br_lck->lock_data)) {
1727                         SAFE_FREE(br_lck->lock_data);
1728                         TALLOC_FREE(br_lck);
1729                         return NULL;
1730                 }
1731
1732                 /* Ensure invalid locks are cleaned up in the destructor. */
1733                 if (orig_num_locks != br_lck->num_locks) {
1734                         br_lck->modified = True;
1735                 }
1736
1737                 /* Mark the lockdb as "clean" as seen from this open file. */
1738                 fsp->lockdb_clean = True;
1739         }
1740
1741         if (DEBUGLEVEL >= 10) {
1742                 unsigned int i;
1743                 struct lock_struct *locks = br_lck->lock_data;
1744                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1745                         br_lck->num_locks,
1746                           file_id_string_tos(&fsp->file_id)));
1747                 for( i = 0; i < br_lck->num_locks; i++) {
1748                         print_lock_struct(i, &locks[i]);
1749                 }
1750         }
1751         return br_lck;
1752 }
1753
1754 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1755                                         files_struct *fsp)
1756 {
1757         return brl_get_locks_internal(mem_ctx, fsp, False);
1758 }
1759
1760 struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
1761                                         files_struct *fsp)
1762 {
1763         return brl_get_locks_internal(mem_ctx, fsp, True);
1764 }
1765
1766 struct brl_revalidate_state {
1767         ssize_t array_size;
1768         uint32 num_pids;
1769         struct server_id *pids;
1770 };
1771
1772 /*
1773  * Collect PIDs of all processes with pending entries
1774  */
1775
1776 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1777                                    enum brl_type lock_type,
1778                                    enum brl_flavour lock_flav,
1779                                    br_off start, br_off size,
1780                                    void *private_data)
1781 {
1782         struct brl_revalidate_state *state =
1783                 (struct brl_revalidate_state *)private_data;
1784
1785         if (!IS_PENDING_LOCK(lock_type)) {
1786                 return;
1787         }
1788
1789         add_to_large_array(state, sizeof(pid), (void *)&pid,
1790                            &state->pids, &state->num_pids,
1791                            &state->array_size);
1792 }
1793
1794 /*
1795  * qsort callback to sort the processes
1796  */
1797
1798 static int compare_procids(const void *p1, const void *p2)
1799 {
1800         const struct server_id *i1 = (struct server_id *)p1;
1801         const struct server_id *i2 = (struct server_id *)p2;
1802
1803         if (i1->pid < i2->pid) return -1;
1804         if (i2->pid > i2->pid) return 1;
1805         return 0;
1806 }
1807
1808 /*
1809  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
1810  * locks so that they retry. Mainly used in the cluster code after a node has
1811  * died.
1812  *
1813  * Done in two steps to avoid double-sends: First we collect all entries in an
1814  * array, then qsort that array and only send to non-dupes.
1815  */
1816
1817 static void brl_revalidate(struct messaging_context *msg_ctx,
1818                            void *private_data,
1819                            uint32_t msg_type,
1820                            struct server_id server_id,
1821                            DATA_BLOB *data)
1822 {
1823         struct brl_revalidate_state *state;
1824         uint32 i;
1825         struct server_id last_pid;
1826
1827         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
1828                 DEBUG(0, ("talloc failed\n"));
1829                 return;
1830         }
1831
1832         brl_forall(brl_revalidate_collect, state);
1833
1834         if (state->array_size == -1) {
1835                 DEBUG(0, ("talloc failed\n"));
1836                 goto done;
1837         }
1838
1839         if (state->num_pids == 0) {
1840                 goto done;
1841         }
1842
1843         qsort(state->pids, state->num_pids, sizeof(state->pids[0]),
1844               compare_procids);
1845
1846         ZERO_STRUCT(last_pid);
1847
1848         for (i=0; i<state->num_pids; i++) {
1849                 if (procid_equal(&last_pid, &state->pids[i])) {
1850                         /*
1851                          * We've seen that one already
1852                          */
1853                         continue;
1854                 }
1855
1856                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
1857                                &data_blob_null);
1858                 last_pid = state->pids[i];
1859         }
1860
1861  done:
1862         TALLOC_FREE(state);
1863         return;
1864 }
1865
1866 void brl_register_msgs(struct messaging_context *msg_ctx)
1867 {
1868         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
1869                            brl_revalidate);
1870 }