This patch looks bigger than it is. It does 2 things. 1). Renames smbpid -> smblctx...
[mat/samba.git] / source3 / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "librpc/gen_ndr/messaging.h"
29
30 #undef DBGC_CLASS
31 #define DBGC_CLASS DBGC_LOCKING
32
33 #define ZERO_ZERO 0
34
35 /* The open brlock.tdb database. */
36
37 static struct db_context *brlock_db;
38
39 /****************************************************************************
40  Debug info at level 10 for lock struct.
41 ****************************************************************************/
42
43 static void print_lock_struct(unsigned int i, struct lock_struct *pls)
44 {
45         DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
46                         i,
47                         (unsigned long long)pls->context.smblctx,
48                         (unsigned int)pls->context.tid,
49                         procid_str(talloc_tos(), &pls->context.pid) ));
50         
51         DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
52                 (double)pls->start,
53                 (double)pls->size,
54                 pls->fnum,
55                 lock_type_name(pls->lock_type),
56                 lock_flav_name(pls->lock_flav) ));
57 }
58
59 /****************************************************************************
60  See if two locking contexts are equal.
61 ****************************************************************************/
62
63 bool brl_same_context(const struct lock_context *ctx1, 
64                              const struct lock_context *ctx2)
65 {
66         return (procid_equal(&ctx1->pid, &ctx2->pid) &&
67                 (ctx1->smblctx == ctx2->smblctx) &&
68                 (ctx1->tid == ctx2->tid));
69 }
70
71 /****************************************************************************
72  See if lck1 and lck2 overlap.
73 ****************************************************************************/
74
75 static bool brl_overlap(const struct lock_struct *lck1,
76                         const struct lock_struct *lck2)
77 {
78         /* XXX Remove for Win7 compatibility. */
79         /* this extra check is not redundent - it copes with locks
80            that go beyond the end of 64 bit file space */
81         if (lck1->size != 0 &&
82             lck1->start == lck2->start &&
83             lck1->size == lck2->size) {
84                 return True;
85         }
86
87         if (lck1->start >= (lck2->start+lck2->size) ||
88             lck2->start >= (lck1->start+lck1->size)) {
89                 return False;
90         }
91         return True;
92 }
93
94 /****************************************************************************
95  See if lock2 can be added when lock1 is in place.
96 ****************************************************************************/
97
98 static bool brl_conflict(const struct lock_struct *lck1, 
99                          const struct lock_struct *lck2)
100 {
101         /* Ignore PENDING locks. */
102         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
103                 return False;
104
105         /* Read locks never conflict. */
106         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
107                 return False;
108         }
109
110         /* A READ lock can stack on top of a WRITE lock if they have the same
111          * context & fnum. */
112         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
113             brl_same_context(&lck1->context, &lck2->context) &&
114             lck1->fnum == lck2->fnum) {
115                 return False;
116         }
117
118         return brl_overlap(lck1, lck2);
119
120
121 /****************************************************************************
122  See if lock2 can be added when lock1 is in place - when both locks are POSIX
123  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
124  know already match.
125 ****************************************************************************/
126
127 static bool brl_conflict_posix(const struct lock_struct *lck1, 
128                                 const struct lock_struct *lck2)
129 {
130 #if defined(DEVELOPER)
131         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
132         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
133 #endif
134
135         /* Ignore PENDING locks. */
136         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
137                 return False;
138
139         /* Read locks never conflict. */
140         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
141                 return False;
142         }
143
144         /* Locks on the same context con't conflict. Ignore fnum. */
145         if (brl_same_context(&lck1->context, &lck2->context)) {
146                 return False;
147         }
148
149         /* One is read, the other write, or the context is different,
150            do they overlap ? */
151         return brl_overlap(lck1, lck2);
152
153
154 #if ZERO_ZERO
155 static bool brl_conflict1(const struct lock_struct *lck1, 
156                          const struct lock_struct *lck2)
157 {
158         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
159                 return False;
160
161         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
162                 return False;
163         }
164
165         if (brl_same_context(&lck1->context, &lck2->context) &&
166             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
167                 return False;
168         }
169
170         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
171                 return True;
172         }
173
174         if (lck1->start >= (lck2->start + lck2->size) ||
175             lck2->start >= (lck1->start + lck1->size)) {
176                 return False;
177         }
178             
179         return True;
180
181 #endif
182
183 /****************************************************************************
184  Check to see if this lock conflicts, but ignore our own locks on the
185  same fnum only. This is the read/write lock check code path.
186  This is never used in the POSIX lock case.
187 ****************************************************************************/
188
189 static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
190 {
191         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
192                 return False;
193
194         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
195                 return False;
196
197         /* POSIX flavour locks never conflict here - this is only called
198            in the read/write path. */
199
200         if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
201                 return False;
202
203         /*
204          * Incoming WRITE locks conflict with existing READ locks even
205          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
206          */
207
208         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
209                 if (brl_same_context(&lck1->context, &lck2->context) &&
210                                         lck1->fnum == lck2->fnum)
211                         return False;
212         }
213
214         return brl_overlap(lck1, lck2);
215
216
217 /****************************************************************************
218  Check if an unlock overlaps a pending lock.
219 ****************************************************************************/
220
221 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
222 {
223         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
224                 return True;
225         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
226                 return True;
227         return False;
228 }
229
230 /****************************************************************************
231  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
232  is the same as this one and changes its error code. I wonder if any
233  app depends on this ?
234 ****************************************************************************/
235
236 NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
237 {
238         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
239                 /* amazing the little things you learn with a test
240                    suite. Locks beyond this offset (as a 64 bit
241                    number!) always generate the conflict error code,
242                    unless the top bit is set */
243                 if (!blocking_lock) {
244                         fsp->last_lock_failure = *lock;
245                 }
246                 return NT_STATUS_FILE_LOCK_CONFLICT;
247         }
248
249         if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
250                         lock->context.tid == fsp->last_lock_failure.context.tid &&
251                         lock->fnum == fsp->last_lock_failure.fnum &&
252                         lock->start == fsp->last_lock_failure.start) {
253                 return NT_STATUS_FILE_LOCK_CONFLICT;
254         }
255
256         if (!blocking_lock) {
257                 fsp->last_lock_failure = *lock;
258         }
259         return NT_STATUS_LOCK_NOT_GRANTED;
260 }
261
262 /****************************************************************************
263  Open up the brlock.tdb database.
264 ****************************************************************************/
265
266 void brl_init(bool read_only)
267 {
268         int tdb_flags;
269
270         if (brlock_db) {
271                 return;
272         }
273
274         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST;
275
276         if (!lp_clustering()) {
277                 /*
278                  * We can't use the SEQNUM trick to cache brlock
279                  * entries in the clustering case because ctdb seqnum
280                  * propagation has a delay.
281                  */
282                 tdb_flags |= TDB_SEQNUM;
283         }
284
285         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
286                             lp_open_files_db_hash_size(), tdb_flags,
287                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
288         if (!brlock_db) {
289                 DEBUG(0,("Failed to open byte range locking database %s\n",
290                         lock_path("brlock.tdb")));
291                 return;
292         }
293 }
294
295 /****************************************************************************
296  Close down the brlock.tdb database.
297 ****************************************************************************/
298
299 void brl_shutdown(void)
300 {
301         TALLOC_FREE(brlock_db);
302 }
303
304 #if ZERO_ZERO
305 /****************************************************************************
306  Compare two locks for sorting.
307 ****************************************************************************/
308
309 static int lock_compare(const struct lock_struct *lck1, 
310                          const struct lock_struct *lck2)
311 {
312         if (lck1->start != lck2->start) {
313                 return (lck1->start - lck2->start);
314         }
315         if (lck2->size != lck1->size) {
316                 return ((int)lck1->size - (int)lck2->size);
317         }
318         return 0;
319 }
320 #endif
321
322 /****************************************************************************
323  Lock a range of bytes - Windows lock semantics.
324 ****************************************************************************/
325
326 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
327     struct lock_struct *plock, bool blocking_lock)
328 {
329         unsigned int i;
330         files_struct *fsp = br_lck->fsp;
331         struct lock_struct *locks = br_lck->lock_data;
332         NTSTATUS status;
333
334         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
335
336         if ((plock->start + plock->size - 1 < plock->start) &&
337                         plock->size != 0) {
338                 return NT_STATUS_INVALID_LOCK_RANGE;
339         }
340
341         for (i=0; i < br_lck->num_locks; i++) {
342                 /* Do any Windows or POSIX locks conflict ? */
343                 if (brl_conflict(&locks[i], plock)) {
344                         /* Remember who blocked us. */
345                         plock->context.smblctx = locks[i].context.smblctx;
346                         return brl_lock_failed(fsp,plock,blocking_lock);
347                 }
348 #if ZERO_ZERO
349                 if (plock->start == 0 && plock->size == 0 && 
350                                 locks[i].size == 0) {
351                         break;
352                 }
353 #endif
354         }
355
356         if (!IS_PENDING_LOCK(plock->lock_type)) {
357                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
358         }
359
360         /* We can get the Windows lock, now see if it needs to
361            be mapped into a lower level POSIX one, and if so can
362            we get it ? */
363
364         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
365                 int errno_ret;
366                 if (!set_posix_lock_windows_flavour(fsp,
367                                 plock->start,
368                                 plock->size,
369                                 plock->lock_type,
370                                 &plock->context,
371                                 locks,
372                                 br_lck->num_locks,
373                                 &errno_ret)) {
374
375                         /* We don't know who blocked us. */
376                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
377
378                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
379                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
380                                 goto fail;
381                         } else {
382                                 status = map_nt_error_from_unix(errno);
383                                 goto fail;
384                         }
385                 }
386         }
387
388         /* no conflicts - add it to the list of locks */
389         locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
390         if (!locks) {
391                 status = NT_STATUS_NO_MEMORY;
392                 goto fail;
393         }
394
395         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
396         br_lck->num_locks += 1;
397         br_lck->lock_data = locks;
398         br_lck->modified = True;
399
400         return NT_STATUS_OK;
401  fail:
402         if (!IS_PENDING_LOCK(plock->lock_type)) {
403                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
404         }
405         return status;
406 }
407
408 /****************************************************************************
409  Cope with POSIX range splits and merges.
410 ****************************************************************************/
411
412 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
413                                                 struct lock_struct *ex,         /* existing lock. */
414                                                 struct lock_struct *plock)      /* proposed lock. */
415 {
416         bool lock_types_differ = (ex->lock_type != plock->lock_type);
417
418         /* We can't merge non-conflicting locks on different context - ignore fnum. */
419
420         if (!brl_same_context(&ex->context, &plock->context)) {
421                 /* Just copy. */
422                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
423                 return 1;
424         }
425
426         /* We now know we have the same context. */
427
428         /* Did we overlap ? */
429
430 /*********************************************
431                                         +---------+
432                                         | ex      |
433                                         +---------+
434                          +-------+
435                          | plock |
436                          +-------+
437 OR....
438         +---------+
439         |  ex     |
440         +---------+
441 **********************************************/
442
443         if ( (ex->start > (plock->start + plock->size)) ||
444                 (plock->start > (ex->start + ex->size))) {
445
446                 /* No overlap with this lock - copy existing. */
447
448                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
449                 return 1;
450         }
451
452 /*********************************************
453         +---------------------------+
454         |          ex               |
455         +---------------------------+
456         +---------------------------+
457         |       plock               | -> replace with plock.
458         +---------------------------+
459 OR
460              +---------------+
461              |       ex      |
462              +---------------+
463         +---------------------------+
464         |       plock               | -> replace with plock.
465         +---------------------------+
466
467 **********************************************/
468
469         if ( (ex->start >= plock->start) &&
470                 (ex->start + ex->size <= plock->start + plock->size) ) {
471
472                 /* Replace - discard existing lock. */
473
474                 return 0;
475         }
476
477 /*********************************************
478 Adjacent after.
479                         +-------+
480                         |  ex   |
481                         +-------+
482         +---------------+
483         |   plock       |
484         +---------------+
485
486 BECOMES....
487         +---------------+-------+
488         |   plock       | ex    | - different lock types.
489         +---------------+-------+
490 OR.... (merge)
491         +-----------------------+
492         |   plock               | - same lock type.
493         +-----------------------+
494 **********************************************/
495
496         if (plock->start + plock->size == ex->start) {
497
498                 /* If the lock types are the same, we merge, if different, we
499                    add the remainder of the old lock. */
500
501                 if (lock_types_differ) {
502                         /* Add existing. */
503                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
504                         return 1;
505                 } else {
506                         /* Merge - adjust incoming lock as we may have more
507                          * merging to come. */
508                         plock->size += ex->size;
509                         return 0;
510                 }
511         }
512
513 /*********************************************
514 Adjacent before.
515         +-------+
516         |  ex   |
517         +-------+
518                 +---------------+
519                 |   plock       |
520                 +---------------+
521 BECOMES....
522         +-------+---------------+
523         | ex    |   plock       | - different lock types
524         +-------+---------------+
525
526 OR.... (merge)
527         +-----------------------+
528         |      plock            | - same lock type.
529         +-----------------------+
530
531 **********************************************/
532
533         if (ex->start + ex->size == plock->start) {
534
535                 /* If the lock types are the same, we merge, if different, we
536                    add the existing lock. */
537
538                 if (lock_types_differ) {
539                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
540                         return 1;
541                 } else {
542                         /* Merge - adjust incoming lock as we may have more
543                          * merging to come. */
544                         plock->start = ex->start;
545                         plock->size += ex->size;
546                         return 0;
547                 }
548         }
549
550 /*********************************************
551 Overlap after.
552         +-----------------------+
553         |          ex           |
554         +-----------------------+
555         +---------------+
556         |   plock       |
557         +---------------+
558 OR
559                +----------------+
560                |       ex       |
561                +----------------+
562         +---------------+
563         |   plock       |
564         +---------------+
565
566 BECOMES....
567         +---------------+-------+
568         |   plock       | ex    | - different lock types.
569         +---------------+-------+
570 OR.... (merge)
571         +-----------------------+
572         |   plock               | - same lock type.
573         +-----------------------+
574 **********************************************/
575
576         if ( (ex->start >= plock->start) &&
577                 (ex->start <= plock->start + plock->size) &&
578                 (ex->start + ex->size > plock->start + plock->size) ) {
579
580                 /* If the lock types are the same, we merge, if different, we
581                    add the remainder of the old lock. */
582
583                 if (lock_types_differ) {
584                         /* Add remaining existing. */
585                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
586                         /* Adjust existing start and size. */
587                         lck_arr[0].start = plock->start + plock->size;
588                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
589                         return 1;
590                 } else {
591                         /* Merge - adjust incoming lock as we may have more
592                          * merging to come. */
593                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
594                         return 0;
595                 }
596         }
597
598 /*********************************************
599 Overlap before.
600         +-----------------------+
601         |  ex                   |
602         +-----------------------+
603                 +---------------+
604                 |   plock       |
605                 +---------------+
606 OR
607         +-------------+
608         |  ex         |
609         +-------------+
610                 +---------------+
611                 |   plock       |
612                 +---------------+
613
614 BECOMES....
615         +-------+---------------+
616         | ex    |   plock       | - different lock types
617         +-------+---------------+
618
619 OR.... (merge)
620         +-----------------------+
621         |      plock            | - same lock type.
622         +-----------------------+
623
624 **********************************************/
625
626         if ( (ex->start < plock->start) &&
627                         (ex->start + ex->size >= plock->start) &&
628                         (ex->start + ex->size <= plock->start + plock->size) ) {
629
630                 /* If the lock types are the same, we merge, if different, we
631                    add the truncated old lock. */
632
633                 if (lock_types_differ) {
634                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
635                         /* Adjust existing size. */
636                         lck_arr[0].size = plock->start - ex->start;
637                         return 1;
638                 } else {
639                         /* Merge - adjust incoming lock as we may have more
640                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
641                         plock->size += (plock->start - ex->start);
642                         plock->start = ex->start;
643                         return 0;
644                 }
645         }
646
647 /*********************************************
648 Complete overlap.
649         +---------------------------+
650         |        ex                 |
651         +---------------------------+
652                 +---------+
653                 |  plock  |
654                 +---------+
655 BECOMES.....
656         +-------+---------+---------+
657         | ex    |  plock  | ex      | - different lock types.
658         +-------+---------+---------+
659 OR
660         +---------------------------+
661         |        plock              | - same lock type.
662         +---------------------------+
663 **********************************************/
664
665         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
666
667                 if (lock_types_differ) {
668
669                         /* We have to split ex into two locks here. */
670
671                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
672                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
673
674                         /* Adjust first existing size. */
675                         lck_arr[0].size = plock->start - ex->start;
676
677                         /* Adjust second existing start and size. */
678                         lck_arr[1].start = plock->start + plock->size;
679                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
680                         return 2;
681                 } else {
682                         /* Just eat the existing locks, merge them into plock. */
683                         plock->start = ex->start;
684                         plock->size = ex->size;
685                         return 0;
686                 }
687         }
688
689         /* Never get here. */
690         smb_panic("brlock_posix_split_merge");
691         /* Notreached. */
692
693         /* Keep some compilers happy. */
694         return 0;
695 }
696
697 /****************************************************************************
698  Lock a range of bytes - POSIX lock semantics.
699  We must cope with range splits and merges.
700 ****************************************************************************/
701
702 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
703                                struct byte_range_lock *br_lck,
704                                struct lock_struct *plock)
705 {
706         unsigned int i, count, posix_count;
707         struct lock_struct *locks = br_lck->lock_data;
708         struct lock_struct *tp;
709         bool signal_pending_read = False;
710         bool break_oplocks = false;
711         NTSTATUS status;
712
713         /* No zero-zero locks for POSIX. */
714         if (plock->start == 0 && plock->size == 0) {
715                 return NT_STATUS_INVALID_PARAMETER;
716         }
717
718         /* Don't allow 64-bit lock wrap. */
719         if (plock->start + plock->size - 1 < plock->start) {
720                 return NT_STATUS_INVALID_PARAMETER;
721         }
722
723         /* The worst case scenario here is we have to split an
724            existing POSIX lock range into two, and add our lock,
725            so we need at most 2 more entries. */
726
727         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
728         if (!tp) {
729                 return NT_STATUS_NO_MEMORY;
730         }
731
732         count = posix_count = 0;
733
734         for (i=0; i < br_lck->num_locks; i++) {
735                 struct lock_struct *curr_lock = &locks[i];
736
737                 /* If we have a pending read lock, a lock downgrade should
738                    trigger a lock re-evaluation. */
739                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
740                                 brl_pending_overlap(plock, curr_lock)) {
741                         signal_pending_read = True;
742                 }
743
744                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
745                         /* Do any Windows flavour locks conflict ? */
746                         if (brl_conflict(curr_lock, plock)) {
747                                 /* No games with error messages. */
748                                 SAFE_FREE(tp);
749                                 /* Remember who blocked us. */
750                                 plock->context.smblctx = curr_lock->context.smblctx;
751                                 return NT_STATUS_FILE_LOCK_CONFLICT;
752                         }
753                         /* Just copy the Windows lock into the new array. */
754                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
755                         count++;
756                 } else {
757                         unsigned int tmp_count = 0;
758
759                         /* POSIX conflict semantics are different. */
760                         if (brl_conflict_posix(curr_lock, plock)) {
761                                 /* Can't block ourselves with POSIX locks. */
762                                 /* No games with error messages. */
763                                 SAFE_FREE(tp);
764                                 /* Remember who blocked us. */
765                                 plock->context.smblctx = curr_lock->context.smblctx;
766                                 return NT_STATUS_FILE_LOCK_CONFLICT;
767                         }
768
769                         /* Work out overlaps. */
770                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
771                         posix_count += tmp_count;
772                         count += tmp_count;
773                 }
774         }
775
776         /*
777          * Break oplocks while we hold a brl. Since lock() and unlock() calls
778          * are not symetric with POSIX semantics, we cannot guarantee our
779          * contend_level2_oplocks_begin/end calls will be acquired and
780          * released one-for-one as with Windows semantics. Therefore we only
781          * call contend_level2_oplocks_begin if this is the first POSIX brl on
782          * the file.
783          */
784         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
785                          posix_count == 0);
786         if (break_oplocks) {
787                 contend_level2_oplocks_begin(br_lck->fsp,
788                                              LEVEL2_CONTEND_POSIX_BRL);
789         }
790
791         /* Try and add the lock in order, sorted by lock start. */
792         for (i=0; i < count; i++) {
793                 struct lock_struct *curr_lock = &tp[i];
794
795                 if (curr_lock->start <= plock->start) {
796                         continue;
797                 }
798         }
799
800         if (i < count) {
801                 memmove(&tp[i+1], &tp[i],
802                         (count - i)*sizeof(struct lock_struct));
803         }
804         memcpy(&tp[i], plock, sizeof(struct lock_struct));
805         count++;
806
807         /* We can get the POSIX lock, now see if it needs to
808            be mapped into a lower level POSIX one, and if so can
809            we get it ? */
810
811         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
812                 int errno_ret;
813
814                 /* The lower layer just needs to attempt to
815                    get the system POSIX lock. We've weeded out
816                    any conflicts above. */
817
818                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
819                                 plock->start,
820                                 plock->size,
821                                 plock->lock_type,
822                                 &errno_ret)) {
823
824                         /* We don't know who blocked us. */
825                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
826
827                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
828                                 SAFE_FREE(tp);
829                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
830                                 goto fail;
831                         } else {
832                                 SAFE_FREE(tp);
833                                 status = map_nt_error_from_unix(errno);
834                                 goto fail;
835                         }
836                 }
837         }
838
839         /* If we didn't use all the allocated size,
840          * Realloc so we don't leak entries per lock call. */
841         if (count < br_lck->num_locks + 2) {
842                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
843                 if (!tp) {
844                         status = NT_STATUS_NO_MEMORY;
845                         goto fail;
846                 }
847         }
848
849         br_lck->num_locks = count;
850         SAFE_FREE(br_lck->lock_data);
851         br_lck->lock_data = tp;
852         locks = tp;
853         br_lck->modified = True;
854
855         /* A successful downgrade from write to read lock can trigger a lock
856            re-evalutation where waiting readers can now proceed. */
857
858         if (signal_pending_read) {
859                 /* Send unlock messages to any pending read waiters that overlap. */
860                 for (i=0; i < br_lck->num_locks; i++) {
861                         struct lock_struct *pend_lock = &locks[i];
862
863                         /* Ignore non-pending locks. */
864                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
865                                 continue;
866                         }
867
868                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
869                                         brl_pending_overlap(plock, pend_lock)) {
870                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
871                                         procid_str_static(&pend_lock->context.pid )));
872
873                                 messaging_send(msg_ctx, pend_lock->context.pid,
874                                                MSG_SMB_UNLOCK, &data_blob_null);
875                         }
876                 }
877         }
878
879         return NT_STATUS_OK;
880  fail:
881         if (break_oplocks) {
882                 contend_level2_oplocks_end(br_lck->fsp,
883                                            LEVEL2_CONTEND_POSIX_BRL);
884         }
885         return status;
886 }
887
888 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
889                                        struct byte_range_lock *br_lck,
890                                        struct lock_struct *plock,
891                                        bool blocking_lock,
892                                        struct blocking_lock_record *blr)
893 {
894         VFS_FIND(brl_lock_windows);
895         return handle->fns->brl_lock_windows(handle, br_lck, plock,
896                                              blocking_lock, blr);
897 }
898
899 /****************************************************************************
900  Lock a range of bytes.
901 ****************************************************************************/
902
903 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
904                 struct byte_range_lock *br_lck,
905                 uint64_t smblctx,
906                 struct server_id pid,
907                 br_off start,
908                 br_off size, 
909                 enum brl_type lock_type,
910                 enum brl_flavour lock_flav,
911                 bool blocking_lock,
912                 uint64_t *psmblctx,
913                 struct blocking_lock_record *blr)
914 {
915         NTSTATUS ret;
916         struct lock_struct lock;
917
918 #if !ZERO_ZERO
919         if (start == 0 && size == 0) {
920                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
921         }
922 #endif
923
924 #ifdef DEVELOPER
925         /* Quieten valgrind on test. */
926         memset(&lock, '\0', sizeof(lock));
927 #endif
928
929         lock.context.smblctx = smblctx;
930         lock.context.pid = pid;
931         lock.context.tid = br_lck->fsp->conn->cnum;
932         lock.start = start;
933         lock.size = size;
934         lock.fnum = br_lck->fsp->fnum;
935         lock.lock_type = lock_type;
936         lock.lock_flav = lock_flav;
937
938         if (lock_flav == WINDOWS_LOCK) {
939                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
940                     &lock, blocking_lock, blr);
941         } else {
942                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
943         }
944
945 #if ZERO_ZERO
946         /* sort the lock list */
947         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
948 #endif
949
950         /* If we're returning an error, return who blocked us. */
951         if (!NT_STATUS_IS_OK(ret) && psmblctx) {
952                 *psmblctx = lock.context.smblctx;
953         }
954         return ret;
955 }
956
957 /****************************************************************************
958  Unlock a range of bytes - Windows semantics.
959 ****************************************************************************/
960
961 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
962                                struct byte_range_lock *br_lck,
963                                const struct lock_struct *plock)
964 {
965         unsigned int i, j;
966         struct lock_struct *locks = br_lck->lock_data;
967         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
968
969         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
970
971 #if ZERO_ZERO
972         /* Delete write locks by preference... The lock list
973            is sorted in the zero zero case. */
974
975         for (i = 0; i < br_lck->num_locks; i++) {
976                 struct lock_struct *lock = &locks[i];
977
978                 if (lock->lock_type == WRITE_LOCK &&
979                     brl_same_context(&lock->context, &plock->context) &&
980                     lock->fnum == plock->fnum &&
981                     lock->lock_flav == WINDOWS_LOCK &&
982                     lock->start == plock->start &&
983                     lock->size == plock->size) {
984
985                         /* found it - delete it */
986                         deleted_lock_type = lock->lock_type;
987                         break;
988                 }
989         }
990
991         if (i != br_lck->num_locks) {
992                 /* We found it - don't search again. */
993                 goto unlock_continue;
994         }
995 #endif
996
997         for (i = 0; i < br_lck->num_locks; i++) {
998                 struct lock_struct *lock = &locks[i];
999
1000                 if (IS_PENDING_LOCK(lock->lock_type)) {
1001                         continue;
1002                 }
1003
1004                 /* Only remove our own locks that match in start, size, and flavour. */
1005                 if (brl_same_context(&lock->context, &plock->context) &&
1006                                         lock->fnum == plock->fnum &&
1007                                         lock->lock_flav == WINDOWS_LOCK &&
1008                                         lock->start == plock->start &&
1009                                         lock->size == plock->size ) {
1010                         deleted_lock_type = lock->lock_type;
1011                         break;
1012                 }
1013         }
1014
1015         if (i == br_lck->num_locks) {
1016                 /* we didn't find it */
1017                 return False;
1018         }
1019
1020 #if ZERO_ZERO
1021   unlock_continue:
1022 #endif
1023
1024         /* Actually delete the lock. */
1025         if (i < br_lck->num_locks - 1) {
1026                 memmove(&locks[i], &locks[i+1], 
1027                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1028         }
1029
1030         br_lck->num_locks -= 1;
1031         br_lck->modified = True;
1032
1033         /* Unlock the underlying POSIX regions. */
1034         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1035                 release_posix_lock_windows_flavour(br_lck->fsp,
1036                                 plock->start,
1037                                 plock->size,
1038                                 deleted_lock_type,
1039                                 &plock->context,
1040                                 locks,
1041                                 br_lck->num_locks);
1042         }
1043
1044         /* Send unlock messages to any pending waiters that overlap. */
1045         for (j=0; j < br_lck->num_locks; j++) {
1046                 struct lock_struct *pend_lock = &locks[j];
1047
1048                 /* Ignore non-pending locks. */
1049                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1050                         continue;
1051                 }
1052
1053                 /* We could send specific lock info here... */
1054                 if (brl_pending_overlap(plock, pend_lock)) {
1055                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1056                                 procid_str_static(&pend_lock->context.pid )));
1057
1058                         messaging_send(msg_ctx, pend_lock->context.pid,
1059                                        MSG_SMB_UNLOCK, &data_blob_null);
1060                 }
1061         }
1062
1063         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1064         return True;
1065 }
1066
1067 /****************************************************************************
1068  Unlock a range of bytes - POSIX semantics.
1069 ****************************************************************************/
1070
1071 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1072                              struct byte_range_lock *br_lck,
1073                              struct lock_struct *plock)
1074 {
1075         unsigned int i, j, count;
1076         struct lock_struct *tp;
1077         struct lock_struct *locks = br_lck->lock_data;
1078         bool overlap_found = False;
1079
1080         /* No zero-zero locks for POSIX. */
1081         if (plock->start == 0 && plock->size == 0) {
1082                 return False;
1083         }
1084
1085         /* Don't allow 64-bit lock wrap. */
1086         if (plock->start + plock->size < plock->start ||
1087                         plock->start + plock->size < plock->size) {
1088                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1089                 return False;
1090         }
1091
1092         /* The worst case scenario here is we have to split an
1093            existing POSIX lock range into two, so we need at most
1094            1 more entry. */
1095
1096         tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
1097         if (!tp) {
1098                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1099                 return False;
1100         }
1101
1102         count = 0;
1103         for (i = 0; i < br_lck->num_locks; i++) {
1104                 struct lock_struct *lock = &locks[i];
1105                 unsigned int tmp_count;
1106
1107                 /* Only remove our own locks - ignore fnum. */
1108                 if (IS_PENDING_LOCK(lock->lock_type) ||
1109                                 !brl_same_context(&lock->context, &plock->context)) {
1110                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1111                         count++;
1112                         continue;
1113                 }
1114
1115                 if (lock->lock_flav == WINDOWS_LOCK) {
1116                         /* Do any Windows flavour locks conflict ? */
1117                         if (brl_conflict(lock, plock)) {
1118                                 SAFE_FREE(tp);
1119                                 return false;
1120                         }
1121                         /* Just copy the Windows lock into the new array. */
1122                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1123                         count++;
1124                         continue;
1125                 }
1126
1127                 /* Work out overlaps. */
1128                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1129
1130                 if (tmp_count == 0) {
1131                         /* plock overlapped the existing lock completely,
1132                            or replaced it. Don't copy the existing lock. */
1133                         overlap_found = true;
1134                 } else if (tmp_count == 1) {
1135                         /* Either no overlap, (simple copy of existing lock) or
1136                          * an overlap of an existing lock. */
1137                         /* If the lock changed size, we had an overlap. */
1138                         if (tp[count].size != lock->size) {
1139                                 overlap_found = true;
1140                         }
1141                         count += tmp_count;
1142                 } else if (tmp_count == 2) {
1143                         /* We split a lock range in two. */
1144                         overlap_found = true;
1145                         count += tmp_count;
1146
1147                         /* Optimisation... */
1148                         /* We know we're finished here as we can't overlap any
1149                            more POSIX locks. Copy the rest of the lock array. */
1150
1151                         if (i < br_lck->num_locks - 1) {
1152                                 memcpy(&tp[count], &locks[i+1],
1153                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1154                                 count += ((br_lck->num_locks-1) - i);
1155                         }
1156                         break;
1157                 }
1158
1159         }
1160
1161         if (!overlap_found) {
1162                 /* Just ignore - no change. */
1163                 SAFE_FREE(tp);
1164                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1165                 return True;
1166         }
1167
1168         /* Unlock any POSIX regions. */
1169         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1170                 release_posix_lock_posix_flavour(br_lck->fsp,
1171                                                 plock->start,
1172                                                 plock->size,
1173                                                 &plock->context,
1174                                                 tp,
1175                                                 count);
1176         }
1177
1178         /* Realloc so we don't leak entries per unlock call. */
1179         if (count) {
1180                 tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
1181                 if (!tp) {
1182                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1183                         return False;
1184                 }
1185         } else {
1186                 /* We deleted the last lock. */
1187                 SAFE_FREE(tp);
1188                 tp = NULL;
1189         }
1190
1191         contend_level2_oplocks_end(br_lck->fsp,
1192                                    LEVEL2_CONTEND_POSIX_BRL);
1193
1194         br_lck->num_locks = count;
1195         SAFE_FREE(br_lck->lock_data);
1196         locks = tp;
1197         br_lck->lock_data = tp;
1198         br_lck->modified = True;
1199
1200         /* Send unlock messages to any pending waiters that overlap. */
1201
1202         for (j=0; j < br_lck->num_locks; j++) {
1203                 struct lock_struct *pend_lock = &locks[j];
1204
1205                 /* Ignore non-pending locks. */
1206                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1207                         continue;
1208                 }
1209
1210                 /* We could send specific lock info here... */
1211                 if (brl_pending_overlap(plock, pend_lock)) {
1212                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1213                                 procid_str_static(&pend_lock->context.pid )));
1214
1215                         messaging_send(msg_ctx, pend_lock->context.pid,
1216                                        MSG_SMB_UNLOCK, &data_blob_null);
1217                 }
1218         }
1219
1220         return True;
1221 }
1222
1223 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1224                                      struct messaging_context *msg_ctx,
1225                                      struct byte_range_lock *br_lck,
1226                                      const struct lock_struct *plock)
1227 {
1228         VFS_FIND(brl_unlock_windows);
1229         return handle->fns->brl_unlock_windows(handle, msg_ctx, br_lck, plock);
1230 }
1231
1232 /****************************************************************************
1233  Unlock a range of bytes.
1234 ****************************************************************************/
1235
1236 bool brl_unlock(struct messaging_context *msg_ctx,
1237                 struct byte_range_lock *br_lck,
1238                 uint64_t smblctx,
1239                 struct server_id pid,
1240                 br_off start,
1241                 br_off size,
1242                 enum brl_flavour lock_flav)
1243 {
1244         struct lock_struct lock;
1245
1246         lock.context.smblctx = smblctx;
1247         lock.context.pid = pid;
1248         lock.context.tid = br_lck->fsp->conn->cnum;
1249         lock.start = start;
1250         lock.size = size;
1251         lock.fnum = br_lck->fsp->fnum;
1252         lock.lock_type = UNLOCK_LOCK;
1253         lock.lock_flav = lock_flav;
1254
1255         if (lock_flav == WINDOWS_LOCK) {
1256                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1257                     br_lck, &lock);
1258         } else {
1259                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1260         }
1261 }
1262
1263 /****************************************************************************
1264  Test if we could add a lock if we wanted to.
1265  Returns True if the region required is currently unlocked, False if locked.
1266 ****************************************************************************/
1267
1268 bool brl_locktest(struct byte_range_lock *br_lck,
1269                 uint64_t smblctx,
1270                 struct server_id pid,
1271                 br_off start,
1272                 br_off size, 
1273                 enum brl_type lock_type,
1274                 enum brl_flavour lock_flav)
1275 {
1276         bool ret = True;
1277         unsigned int i;
1278         struct lock_struct lock;
1279         const struct lock_struct *locks = br_lck->lock_data;
1280         files_struct *fsp = br_lck->fsp;
1281
1282         lock.context.smblctx = smblctx;
1283         lock.context.pid = pid;
1284         lock.context.tid = br_lck->fsp->conn->cnum;
1285         lock.start = start;
1286         lock.size = size;
1287         lock.fnum = fsp->fnum;
1288         lock.lock_type = lock_type;
1289         lock.lock_flav = lock_flav;
1290
1291         /* Make sure existing locks don't conflict */
1292         for (i=0; i < br_lck->num_locks; i++) {
1293                 /*
1294                  * Our own locks don't conflict.
1295                  */
1296                 if (brl_conflict_other(&locks[i], &lock)) {
1297                         return False;
1298                 }
1299         }
1300
1301         /*
1302          * There is no lock held by an SMB daemon, check to
1303          * see if there is a POSIX lock from a UNIX or NFS process.
1304          * This only conflicts with Windows locks, not POSIX locks.
1305          */
1306
1307         if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
1308                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1309
1310                 DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1311                         (double)start, (double)size, ret ? "locked" : "unlocked",
1312                         fsp->fnum, fsp_str_dbg(fsp)));
1313
1314                 /* We need to return the inverse of is_posix_locked. */
1315                 ret = !ret;
1316         }
1317
1318         /* no conflicts - we could have added it */
1319         return ret;
1320 }
1321
1322 /****************************************************************************
1323  Query for existing locks.
1324 ****************************************************************************/
1325
1326 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1327                 uint64_t *psmblctx,
1328                 struct server_id pid,
1329                 br_off *pstart,
1330                 br_off *psize, 
1331                 enum brl_type *plock_type,
1332                 enum brl_flavour lock_flav)
1333 {
1334         unsigned int i;
1335         struct lock_struct lock;
1336         const struct lock_struct *locks = br_lck->lock_data;
1337         files_struct *fsp = br_lck->fsp;
1338
1339         lock.context.smblctx = *psmblctx;
1340         lock.context.pid = pid;
1341         lock.context.tid = br_lck->fsp->conn->cnum;
1342         lock.start = *pstart;
1343         lock.size = *psize;
1344         lock.fnum = fsp->fnum;
1345         lock.lock_type = *plock_type;
1346         lock.lock_flav = lock_flav;
1347
1348         /* Make sure existing locks don't conflict */
1349         for (i=0; i < br_lck->num_locks; i++) {
1350                 const struct lock_struct *exlock = &locks[i];
1351                 bool conflict = False;
1352
1353                 if (exlock->lock_flav == WINDOWS_LOCK) {
1354                         conflict = brl_conflict(exlock, &lock);
1355                 } else {        
1356                         conflict = brl_conflict_posix(exlock, &lock);
1357                 }
1358
1359                 if (conflict) {
1360                         *psmblctx = exlock->context.smblctx;
1361                         *pstart = exlock->start;
1362                         *psize = exlock->size;
1363                         *plock_type = exlock->lock_type;
1364                         return NT_STATUS_LOCK_NOT_GRANTED;
1365                 }
1366         }
1367
1368         /*
1369          * There is no lock held by an SMB daemon, check to
1370          * see if there is a POSIX lock from a UNIX or NFS process.
1371          */
1372
1373         if(lp_posix_locking(fsp->conn->params)) {
1374                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1375
1376                 DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
1377                         (double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
1378                         fsp->fnum, fsp_str_dbg(fsp)));
1379
1380                 if (ret) {
1381                         /* Hmmm. No clue what to set smblctx to - use -1. */
1382                         *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1383                         return NT_STATUS_LOCK_NOT_GRANTED;
1384                 }
1385         }
1386
1387         return NT_STATUS_OK;
1388 }
1389
1390
1391 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1392                                      struct byte_range_lock *br_lck,
1393                                      struct lock_struct *plock,
1394                                      struct blocking_lock_record *blr)
1395 {
1396         VFS_FIND(brl_cancel_windows);
1397         return handle->fns->brl_cancel_windows(handle, br_lck, plock, blr);
1398 }
1399
1400 /****************************************************************************
1401  Remove a particular pending lock.
1402 ****************************************************************************/
1403 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1404                 uint64_t smblctx,
1405                 struct server_id pid,
1406                 br_off start,
1407                 br_off size,
1408                 enum brl_flavour lock_flav,
1409                 struct blocking_lock_record *blr)
1410 {
1411         bool ret;
1412         struct lock_struct lock;
1413
1414         lock.context.smblctx = smblctx;
1415         lock.context.pid = pid;
1416         lock.context.tid = br_lck->fsp->conn->cnum;
1417         lock.start = start;
1418         lock.size = size;
1419         lock.fnum = br_lck->fsp->fnum;
1420         lock.lock_flav = lock_flav;
1421         /* lock.lock_type doesn't matter */
1422
1423         if (lock_flav == WINDOWS_LOCK) {
1424                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1425                     &lock, blr);
1426         } else {
1427                 ret = brl_lock_cancel_default(br_lck, &lock);
1428         }
1429
1430         return ret;
1431 }
1432
1433 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1434                 struct lock_struct *plock)
1435 {
1436         unsigned int i;
1437         struct lock_struct *locks = br_lck->lock_data;
1438
1439         SMB_ASSERT(plock);
1440
1441         for (i = 0; i < br_lck->num_locks; i++) {
1442                 struct lock_struct *lock = &locks[i];
1443
1444                 /* For pending locks we *always* care about the fnum. */
1445                 if (brl_same_context(&lock->context, &plock->context) &&
1446                                 lock->fnum == plock->fnum &&
1447                                 IS_PENDING_LOCK(lock->lock_type) &&
1448                                 lock->lock_flav == plock->lock_flav &&
1449                                 lock->start == plock->start &&
1450                                 lock->size == plock->size) {
1451                         break;
1452                 }
1453         }
1454
1455         if (i == br_lck->num_locks) {
1456                 /* Didn't find it. */
1457                 return False;
1458         }
1459
1460         if (i < br_lck->num_locks - 1) {
1461                 /* Found this particular pending lock - delete it */
1462                 memmove(&locks[i], &locks[i+1], 
1463                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1464         }
1465
1466         br_lck->num_locks -= 1;
1467         br_lck->modified = True;
1468         return True;
1469 }
1470
1471 /****************************************************************************
1472  Remove any locks associated with a open file.
1473  We return True if this process owns any other Windows locks on this
1474  fd and so we should not immediately close the fd.
1475 ****************************************************************************/
1476
1477 void brl_close_fnum(struct messaging_context *msg_ctx,
1478                     struct byte_range_lock *br_lck)
1479 {
1480         files_struct *fsp = br_lck->fsp;
1481         uint16 tid = fsp->conn->cnum;
1482         int fnum = fsp->fnum;
1483         unsigned int i, j, dcount=0;
1484         int num_deleted_windows_locks = 0;
1485         struct lock_struct *locks = br_lck->lock_data;
1486         struct server_id pid = procid_self();
1487         bool unlock_individually = False;
1488         bool posix_level2_contention_ended = false;
1489
1490         if(lp_posix_locking(fsp->conn->params)) {
1491
1492                 /* Check if there are any Windows locks associated with this dev/ino
1493                    pair that are not this fnum. If so we need to call unlock on each
1494                    one in order to release the system POSIX locks correctly. */
1495
1496                 for (i=0; i < br_lck->num_locks; i++) {
1497                         struct lock_struct *lock = &locks[i];
1498
1499                         if (!procid_equal(&lock->context.pid, &pid)) {
1500                                 continue;
1501                         }
1502
1503                         if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
1504                                 continue; /* Ignore pending. */
1505                         }
1506
1507                         if (lock->context.tid != tid || lock->fnum != fnum) {
1508                                 unlock_individually = True;
1509                                 break;
1510                         }
1511                 }
1512
1513                 if (unlock_individually) {
1514                         struct lock_struct *locks_copy;
1515                         unsigned int num_locks_copy;
1516
1517                         /* Copy the current lock array. */
1518                         if (br_lck->num_locks) {
1519                                 locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1520                                 if (!locks_copy) {
1521                                         smb_panic("brl_close_fnum: talloc failed");
1522                                 }
1523                         } else {        
1524                                 locks_copy = NULL;
1525                         }
1526
1527                         num_locks_copy = br_lck->num_locks;
1528
1529                         for (i=0; i < num_locks_copy; i++) {
1530                                 struct lock_struct *lock = &locks_copy[i];
1531
1532                                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
1533                                                 (lock->fnum == fnum)) {
1534                                         brl_unlock(msg_ctx,
1535                                                 br_lck,
1536                                                 lock->context.smblctx,
1537                                                 pid,
1538                                                 lock->start,
1539                                                 lock->size,
1540                                                 lock->lock_flav);
1541                                 }
1542                         }
1543                         return;
1544                 }
1545         }
1546
1547         /* We can bulk delete - any POSIX locks will be removed when the fd closes. */
1548
1549         /* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
1550
1551         for (i=0; i < br_lck->num_locks; i++) {
1552                 struct lock_struct *lock = &locks[i];
1553                 bool del_this_lock = False;
1554
1555                 if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
1556                         if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
1557                                 del_this_lock = True;
1558                                 num_deleted_windows_locks++;
1559                                 contend_level2_oplocks_end(br_lck->fsp,
1560                                     LEVEL2_CONTEND_WINDOWS_BRL);
1561                         } else if (lock->lock_flav == POSIX_LOCK) {
1562                                 del_this_lock = True;
1563
1564                                 /* Only end level2 contention once for posix */
1565                                 if (!posix_level2_contention_ended) {
1566                                         posix_level2_contention_ended = true;
1567                                         contend_level2_oplocks_end(br_lck->fsp,
1568                                             LEVEL2_CONTEND_POSIX_BRL);
1569                                 }
1570                         }
1571                 }
1572
1573                 if (del_this_lock) {
1574                         /* Send unlock messages to any pending waiters that overlap. */
1575                         for (j=0; j < br_lck->num_locks; j++) {
1576                                 struct lock_struct *pend_lock = &locks[j];
1577
1578                                 /* Ignore our own or non-pending locks. */
1579                                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1580                                         continue;
1581                                 }
1582
1583                                 /* Optimisation - don't send to this fnum as we're
1584                                    closing it. */
1585                                 if (pend_lock->context.tid == tid &&
1586                                     procid_equal(&pend_lock->context.pid, &pid) &&
1587                                     pend_lock->fnum == fnum) {
1588                                         continue;
1589                                 }
1590
1591                                 /* We could send specific lock info here... */
1592                                 if (brl_pending_overlap(lock, pend_lock)) {
1593                                         messaging_send(msg_ctx, pend_lock->context.pid,
1594                                                        MSG_SMB_UNLOCK, &data_blob_null);
1595                                 }
1596                         }
1597
1598                         /* found it - delete it */
1599                         if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
1600                                 memmove(&locks[i], &locks[i+1], 
1601                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1602                         }
1603                         br_lck->num_locks--;
1604                         br_lck->modified = True;
1605                         i--;
1606                         dcount++;
1607                 }
1608         }
1609
1610         if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
1611                 /* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
1612                 reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
1613         }
1614 }
1615
1616 /****************************************************************************
1617  Ensure this set of lock entries is valid.
1618 ****************************************************************************/
1619 static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
1620 {
1621         unsigned int i;
1622         unsigned int num_valid_entries = 0;
1623         struct lock_struct *locks = *pplocks;
1624
1625         for (i = 0; i < *pnum_entries; i++) {
1626                 struct lock_struct *lock_data = &locks[i];
1627                 if (!serverid_exists(&lock_data->context.pid)) {
1628                         /* This process no longer exists - mark this
1629                            entry as invalid by zeroing it. */
1630                         ZERO_STRUCTP(lock_data);
1631                 } else {
1632                         num_valid_entries++;
1633                 }
1634         }
1635
1636         if (num_valid_entries != *pnum_entries) {
1637                 struct lock_struct *new_lock_data = NULL;
1638
1639                 if (num_valid_entries) {
1640                         new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
1641                         if (!new_lock_data) {
1642                                 DEBUG(3, ("malloc fail\n"));
1643                                 return False;
1644                         }
1645
1646                         num_valid_entries = 0;
1647                         for (i = 0; i < *pnum_entries; i++) {
1648                                 struct lock_struct *lock_data = &locks[i];
1649                                 if (lock_data->context.smblctx &&
1650                                                 lock_data->context.tid) {
1651                                         /* Valid (nonzero) entry - copy it. */
1652                                         memcpy(&new_lock_data[num_valid_entries],
1653                                                 lock_data, sizeof(struct lock_struct));
1654                                         num_valid_entries++;
1655                                 }
1656                         }
1657                 }
1658
1659                 SAFE_FREE(*pplocks);
1660                 *pplocks = new_lock_data;
1661                 *pnum_entries = num_valid_entries;
1662         }
1663
1664         return True;
1665 }
1666
1667 struct brl_forall_cb {
1668         void (*fn)(struct file_id id, struct server_id pid,
1669                    enum brl_type lock_type,
1670                    enum brl_flavour lock_flav,
1671                    br_off start, br_off size,
1672                    void *private_data);
1673         void *private_data;
1674 };
1675
1676 /****************************************************************************
1677  Traverse the whole database with this function, calling traverse_callback
1678  on each lock.
1679 ****************************************************************************/
1680
1681 static int traverse_fn(struct db_record *rec, void *state)
1682 {
1683         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1684         struct lock_struct *locks;
1685         struct file_id *key;
1686         unsigned int i;
1687         unsigned int num_locks = 0;
1688         unsigned int orig_num_locks = 0;
1689
1690         /* In a traverse function we must make a copy of
1691            dbuf before modifying it. */
1692
1693         locks = (struct lock_struct *)memdup(rec->value.dptr,
1694                                              rec->value.dsize);
1695         if (!locks) {
1696                 return -1; /* Terminate traversal. */
1697         }
1698
1699         key = (struct file_id *)rec->key.dptr;
1700         orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
1701
1702         /* Ensure the lock db is clean of entries from invalid processes. */
1703
1704         if (!validate_lock_entries(&num_locks, &locks)) {
1705                 SAFE_FREE(locks);
1706                 return -1; /* Terminate traversal */
1707         }
1708
1709         if (orig_num_locks != num_locks) {
1710                 if (num_locks) {
1711                         TDB_DATA data;
1712                         data.dptr = (uint8_t *)locks;
1713                         data.dsize = num_locks*sizeof(struct lock_struct);
1714                         rec->store(rec, data, TDB_REPLACE);
1715                 } else {
1716                         rec->delete_rec(rec);
1717                 }
1718         }
1719
1720         if (cb->fn) {
1721                 for ( i=0; i<num_locks; i++) {
1722                         cb->fn(*key,
1723                                 locks[i].context.pid,
1724                                 locks[i].lock_type,
1725                                 locks[i].lock_flav,
1726                                 locks[i].start,
1727                                 locks[i].size,
1728                                 cb->private_data);
1729                 }
1730         }
1731
1732         SAFE_FREE(locks);
1733         return 0;
1734 }
1735
1736 /*******************************************************************
1737  Call the specified function on each lock in the database.
1738 ********************************************************************/
1739
1740 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1741                           enum brl_type lock_type,
1742                           enum brl_flavour lock_flav,
1743                           br_off start, br_off size,
1744                           void *private_data),
1745                void *private_data)
1746 {
1747         struct brl_forall_cb cb;
1748
1749         if (!brlock_db) {
1750                 return 0;
1751         }
1752         cb.fn = fn;
1753         cb.private_data = private_data;
1754         return brlock_db->traverse(brlock_db, traverse_fn, &cb);
1755 }
1756
1757 /*******************************************************************
1758  Store a potentially modified set of byte range lock data back into
1759  the database.
1760  Unlock the record.
1761 ********************************************************************/
1762
1763 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1764 {
1765         if (br_lck->read_only) {
1766                 SMB_ASSERT(!br_lck->modified);
1767         }
1768
1769         if (!br_lck->modified) {
1770                 goto done;
1771         }
1772
1773         if (br_lck->num_locks == 0) {
1774                 /* No locks - delete this entry. */
1775                 NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
1776                 if (!NT_STATUS_IS_OK(status)) {
1777                         DEBUG(0, ("delete_rec returned %s\n",
1778                                   nt_errstr(status)));
1779                         smb_panic("Could not delete byte range lock entry");
1780                 }
1781         } else {
1782                 TDB_DATA data;
1783                 NTSTATUS status;
1784
1785                 data.dptr = (uint8 *)br_lck->lock_data;
1786                 data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
1787
1788                 status = br_lck->record->store(br_lck->record, data,
1789                                                TDB_REPLACE);
1790                 if (!NT_STATUS_IS_OK(status)) {
1791                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1792                         smb_panic("Could not store byte range mode entry");
1793                 }
1794         }
1795
1796  done:
1797
1798         SAFE_FREE(br_lck->lock_data);
1799         TALLOC_FREE(br_lck->record);
1800         return 0;
1801 }
1802
1803 /*******************************************************************
1804  Fetch a set of byte range lock data from the database.
1805  Leave the record locked.
1806  TALLOC_FREE(brl) will release the lock in the destructor.
1807 ********************************************************************/
1808
1809 static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
1810                                         files_struct *fsp, bool read_only)
1811 {
1812         TDB_DATA key, data;
1813         struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
1814
1815         if (br_lck == NULL) {
1816                 return NULL;
1817         }
1818
1819         br_lck->fsp = fsp;
1820         br_lck->num_locks = 0;
1821         br_lck->modified = False;
1822         br_lck->key = fsp->file_id;
1823
1824         key.dptr = (uint8 *)&br_lck->key;
1825         key.dsize = sizeof(struct file_id);
1826
1827         if (!fsp->lockdb_clean) {
1828                 /* We must be read/write to clean
1829                    the dead entries. */
1830                 read_only = False;
1831         }
1832
1833         if (read_only) {
1834                 if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
1835                         DEBUG(3, ("Could not fetch byte range lock record\n"));
1836                         TALLOC_FREE(br_lck);
1837                         return NULL;
1838                 }
1839                 br_lck->record = NULL;
1840         }
1841         else {
1842                 br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
1843
1844                 if (br_lck->record == NULL) {
1845                         DEBUG(3, ("Could not lock byte range lock entry\n"));
1846                         TALLOC_FREE(br_lck);
1847                         return NULL;
1848                 }
1849
1850                 data = br_lck->record->value;
1851         }
1852
1853         br_lck->read_only = read_only;
1854         br_lck->lock_data = NULL;
1855
1856         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1857
1858         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1859
1860         if (br_lck->num_locks != 0) {
1861                 br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
1862                                                      br_lck->num_locks);
1863                 if (br_lck->lock_data == NULL) {
1864                         DEBUG(0, ("malloc failed\n"));
1865                         TALLOC_FREE(br_lck);
1866                         return NULL;
1867                 }
1868
1869                 memcpy(br_lck->lock_data, data.dptr, data.dsize);
1870         }
1871         
1872         if (!fsp->lockdb_clean) {
1873                 int orig_num_locks = br_lck->num_locks;
1874
1875                 /* This is the first time we've accessed this. */
1876                 /* Go through and ensure all entries exist - remove any that don't. */
1877                 /* Makes the lockdb self cleaning at low cost. */
1878
1879                 if (!validate_lock_entries(&br_lck->num_locks,
1880                                            &br_lck->lock_data)) {
1881                         SAFE_FREE(br_lck->lock_data);
1882                         TALLOC_FREE(br_lck);
1883                         return NULL;
1884                 }
1885
1886                 /* Ensure invalid locks are cleaned up in the destructor. */
1887                 if (orig_num_locks != br_lck->num_locks) {
1888                         br_lck->modified = True;
1889                 }
1890
1891                 /* Mark the lockdb as "clean" as seen from this open file. */
1892                 fsp->lockdb_clean = True;
1893         }
1894
1895         if (DEBUGLEVEL >= 10) {
1896                 unsigned int i;
1897                 struct lock_struct *locks = br_lck->lock_data;
1898                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1899                         br_lck->num_locks,
1900                           file_id_string_tos(&fsp->file_id)));
1901                 for( i = 0; i < br_lck->num_locks; i++) {
1902                         print_lock_struct(i, &locks[i]);
1903                 }
1904         }
1905         return br_lck;
1906 }
1907
1908 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
1909                                         files_struct *fsp)
1910 {
1911         return brl_get_locks_internal(mem_ctx, fsp, False);
1912 }
1913
1914 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
1915 {
1916         struct byte_range_lock *br_lock;
1917
1918         if (lp_clustering()) {
1919                 return brl_get_locks_internal(talloc_tos(), fsp, true);
1920         }
1921
1922         if ((fsp->brlock_rec != NULL)
1923             && (brlock_db->get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
1924                 return fsp->brlock_rec;
1925         }
1926
1927         TALLOC_FREE(fsp->brlock_rec);
1928
1929         br_lock = brl_get_locks_internal(talloc_tos(), fsp, false);
1930         if (br_lock == NULL) {
1931                 return NULL;
1932         }
1933         fsp->brlock_seqnum = brlock_db->get_seqnum(brlock_db);
1934
1935         fsp->brlock_rec = talloc_zero(fsp, struct byte_range_lock);
1936         if (fsp->brlock_rec == NULL) {
1937                 goto fail;
1938         }
1939         fsp->brlock_rec->fsp = fsp;
1940         fsp->brlock_rec->num_locks = br_lock->num_locks;
1941         fsp->brlock_rec->read_only = true;
1942         fsp->brlock_rec->key = br_lock->key;
1943
1944         fsp->brlock_rec->lock_data = (struct lock_struct *)
1945                 talloc_memdup(fsp->brlock_rec, br_lock->lock_data,
1946                               sizeof(struct lock_struct) * br_lock->num_locks);
1947         if (fsp->brlock_rec->lock_data == NULL) {
1948                 goto fail;
1949         }
1950
1951         TALLOC_FREE(br_lock);
1952         return fsp->brlock_rec;
1953 fail:
1954         TALLOC_FREE(br_lock);
1955         TALLOC_FREE(fsp->brlock_rec);
1956         return NULL;
1957 }
1958
1959 struct brl_revalidate_state {
1960         ssize_t array_size;
1961         uint32 num_pids;
1962         struct server_id *pids;
1963 };
1964
1965 /*
1966  * Collect PIDs of all processes with pending entries
1967  */
1968
1969 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
1970                                    enum brl_type lock_type,
1971                                    enum brl_flavour lock_flav,
1972                                    br_off start, br_off size,
1973                                    void *private_data)
1974 {
1975         struct brl_revalidate_state *state =
1976                 (struct brl_revalidate_state *)private_data;
1977
1978         if (!IS_PENDING_LOCK(lock_type)) {
1979                 return;
1980         }
1981
1982         add_to_large_array(state, sizeof(pid), (void *)&pid,
1983                            &state->pids, &state->num_pids,
1984                            &state->array_size);
1985 }
1986
1987 /*
1988  * qsort callback to sort the processes
1989  */
1990
1991 static int compare_procids(const void *p1, const void *p2)
1992 {
1993         const struct server_id *i1 = (struct server_id *)p1;
1994         const struct server_id *i2 = (struct server_id *)p2;
1995
1996         if (i1->pid < i2->pid) return -1;
1997         if (i2->pid > i2->pid) return 1;
1998         return 0;
1999 }
2000
2001 /*
2002  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2003  * locks so that they retry. Mainly used in the cluster code after a node has
2004  * died.
2005  *
2006  * Done in two steps to avoid double-sends: First we collect all entries in an
2007  * array, then qsort that array and only send to non-dupes.
2008  */
2009
2010 static void brl_revalidate(struct messaging_context *msg_ctx,
2011                            void *private_data,
2012                            uint32_t msg_type,
2013                            struct server_id server_id,
2014                            DATA_BLOB *data)
2015 {
2016         struct brl_revalidate_state *state;
2017         uint32 i;
2018         struct server_id last_pid;
2019
2020         if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
2021                 DEBUG(0, ("talloc failed\n"));
2022                 return;
2023         }
2024
2025         brl_forall(brl_revalidate_collect, state);
2026
2027         if (state->array_size == -1) {
2028                 DEBUG(0, ("talloc failed\n"));
2029                 goto done;
2030         }
2031
2032         if (state->num_pids == 0) {
2033                 goto done;
2034         }
2035
2036         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2037
2038         ZERO_STRUCT(last_pid);
2039
2040         for (i=0; i<state->num_pids; i++) {
2041                 if (procid_equal(&last_pid, &state->pids[i])) {
2042                         /*
2043                          * We've seen that one already
2044                          */
2045                         continue;
2046                 }
2047
2048                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2049                                &data_blob_null);
2050                 last_pid = state->pids[i];
2051         }
2052
2053  done:
2054         TALLOC_FREE(state);
2055         return;
2056 }
2057
2058 void brl_register_msgs(struct messaging_context *msg_ctx)
2059 {
2060         messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
2061                            brl_revalidate);
2062 }