brlock: Remove validate_lock_entries
[samba.git] / source3 / locking / brlock.c
1 /*
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 3 of the License, or
12    (at your option) any later version.
13
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18
19    You should have received a copy of the GNU General Public License
20    along with this program.  If not, see <http://www.gnu.org/licenses/>.
21 */
22
23 /* This module implements a tdb based byte range locking service,
24    replacing the fcntl() based byte range locking previously
25    used. This allows us to provide the same semantics as NT */
26
27 #include "includes.h"
28 #include "system/filesys.h"
29 #include "locking/proto.h"
30 #include "smbd/globals.h"
31 #include "dbwrap/dbwrap.h"
32 #include "dbwrap/dbwrap_open.h"
33 #include "serverid.h"
34 #include "messages.h"
35 #include "util_tdb.h"
36
37 #undef DBGC_CLASS
38 #define DBGC_CLASS DBGC_LOCKING
39
40 #define ZERO_ZERO 0
41
42 /* The open brlock.tdb database. */
43
44 static struct db_context *brlock_db;
45
46 struct byte_range_lock {
47         struct files_struct *fsp;
48         unsigned int num_locks;
49         bool modified;
50         bool have_read_oplocks;
51         struct lock_struct *lock_data;
52         struct db_record *record;
53 };
54
55 /****************************************************************************
56  Debug info at level 10 for lock struct.
57 ****************************************************************************/
58
59 static void print_lock_struct(unsigned int i, const struct lock_struct *pls)
60 {
61         DEBUG(10,("[%u]: smblctx = %llu, tid = %u, pid = %s, ",
62                         i,
63                         (unsigned long long)pls->context.smblctx,
64                         (unsigned int)pls->context.tid,
65                         server_id_str(talloc_tos(), &pls->context.pid) ));
66
67         DEBUG(10, ("start = %ju, size = %ju, fnum = %ju, %s %s\n",
68                    (uintmax_t)pls->start,
69                    (uintmax_t)pls->size,
70                    (uintmax_t)pls->fnum,
71                    lock_type_name(pls->lock_type),
72                    lock_flav_name(pls->lock_flav)));
73 }
74
75 unsigned int brl_num_locks(const struct byte_range_lock *brl)
76 {
77         return brl->num_locks;
78 }
79
80 struct files_struct *brl_fsp(struct byte_range_lock *brl)
81 {
82         return brl->fsp;
83 }
84
85 bool brl_have_read_oplocks(const struct byte_range_lock *brl)
86 {
87         return brl->have_read_oplocks;
88 }
89
90 void brl_set_have_read_oplocks(struct byte_range_lock *brl,
91                                bool have_read_oplocks)
92 {
93         DEBUG(10, ("Setting have_read_oplocks to %s\n",
94                    have_read_oplocks ? "true" : "false"));
95         SMB_ASSERT(brl->record != NULL); /* otherwise we're readonly */
96         brl->have_read_oplocks = have_read_oplocks;
97         brl->modified = true;
98 }
99
100 /****************************************************************************
101  See if two locking contexts are equal.
102 ****************************************************************************/
103
104 static bool brl_same_context(const struct lock_context *ctx1,
105                              const struct lock_context *ctx2)
106 {
107         return (serverid_equal(&ctx1->pid, &ctx2->pid) &&
108                 (ctx1->smblctx == ctx2->smblctx) &&
109                 (ctx1->tid == ctx2->tid));
110 }
111
112 /****************************************************************************
113  See if lck1 and lck2 overlap.
114 ****************************************************************************/
115
116 static bool brl_overlap(const struct lock_struct *lck1,
117                         const struct lock_struct *lck2)
118 {
119         /* XXX Remove for Win7 compatibility. */
120         /* this extra check is not redundant - it copes with locks
121            that go beyond the end of 64 bit file space */
122         if (lck1->size != 0 &&
123             lck1->start == lck2->start &&
124             lck1->size == lck2->size) {
125                 return True;
126         }
127
128         if (lck1->start >= (lck2->start+lck2->size) ||
129             lck2->start >= (lck1->start+lck1->size)) {
130                 return False;
131         }
132         return True;
133 }
134
135 /****************************************************************************
136  See if lock2 can be added when lock1 is in place.
137 ****************************************************************************/
138
139 static bool brl_conflict(const struct lock_struct *lck1,
140                          const struct lock_struct *lck2)
141 {
142         /* Ignore PENDING locks. */
143         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
144                 return False;
145
146         /* Read locks never conflict. */
147         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
148                 return False;
149         }
150
151         /* A READ lock can stack on top of a WRITE lock if they have the same
152          * context & fnum. */
153         if (lck1->lock_type == WRITE_LOCK && lck2->lock_type == READ_LOCK &&
154             brl_same_context(&lck1->context, &lck2->context) &&
155             lck1->fnum == lck2->fnum) {
156                 return False;
157         }
158
159         return brl_overlap(lck1, lck2);
160 }
161
162 /****************************************************************************
163  See if lock2 can be added when lock1 is in place - when both locks are POSIX
164  flavour. POSIX locks ignore fnum - they only care about dev/ino which we
165  know already match.
166 ****************************************************************************/
167
168 static bool brl_conflict_posix(const struct lock_struct *lck1,
169                                 const struct lock_struct *lck2)
170 {
171 #if defined(DEVELOPER)
172         SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
173         SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
174 #endif
175
176         /* Ignore PENDING locks. */
177         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
178                 return False;
179
180         /* Read locks never conflict. */
181         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
182                 return False;
183         }
184
185         /* Locks on the same context don't conflict. Ignore fnum. */
186         if (brl_same_context(&lck1->context, &lck2->context)) {
187                 return False;
188         }
189
190         /* One is read, the other write, or the context is different,
191            do they overlap ? */
192         return brl_overlap(lck1, lck2);
193 }
194
195 #if ZERO_ZERO
196 static bool brl_conflict1(const struct lock_struct *lck1,
197                          const struct lock_struct *lck2)
198 {
199         if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
200                 return False;
201
202         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
203                 return False;
204         }
205
206         if (brl_same_context(&lck1->context, &lck2->context) &&
207             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
208                 return False;
209         }
210
211         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
212                 return True;
213         }
214
215         if (lck1->start >= (lck2->start + lck2->size) ||
216             lck2->start >= (lck1->start + lck1->size)) {
217                 return False;
218         }
219
220         return True;
221 }
222 #endif
223
224 /****************************************************************************
225  Check to see if this lock conflicts, but ignore our own locks on the
226  same fnum only. This is the read/write lock check code path.
227  This is never used in the POSIX lock case.
228 ****************************************************************************/
229
230 static bool brl_conflict_other(const struct lock_struct *lock,
231                                const struct lock_struct *rw_probe)
232 {
233         if (IS_PENDING_LOCK(lock->lock_type) ||
234             IS_PENDING_LOCK(rw_probe->lock_type)) {
235                 return False;
236         }
237
238         if (lock->lock_type == READ_LOCK && rw_probe->lock_type == READ_LOCK) {
239                 return False;
240         }
241
242         if (lock->lock_flav == POSIX_LOCK &&
243             rw_probe->lock_flav == POSIX_LOCK) {
244                 /*
245                  * POSIX flavour locks never conflict here - this is only called
246                  * in the read/write path.
247                  */
248                 return False;
249         }
250
251         if (!brl_overlap(lock, rw_probe)) {
252                 /*
253                  * I/O can only conflict when overlapping a lock, thus let it
254                  * pass
255                  */
256                 return false;
257         }
258
259         if (!brl_same_context(&lock->context, &rw_probe->context)) {
260                 /*
261                  * Different process, conflict
262                  */
263                 return true;
264         }
265
266         if (lock->fnum != rw_probe->fnum) {
267                 /*
268                  * Different file handle, conflict
269                  */
270                 return true;
271         }
272
273         if ((lock->lock_type == READ_LOCK) &&
274             (rw_probe->lock_type == WRITE_LOCK)) {
275                 /*
276                  * Incoming WRITE locks conflict with existing READ locks even
277                  * if the context is the same. JRA. See LOCKTEST7 in
278                  * smbtorture.
279                  */
280                 return true;
281         }
282
283         /*
284          * I/O request compatible with existing lock, let it pass without
285          * conflict
286          */
287
288         return false;
289 }
290
291 /****************************************************************************
292  Check if an unlock overlaps a pending lock.
293 ****************************************************************************/
294
295 static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
296 {
297         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
298                 return True;
299         if ((lock->start >= pend_lock->start) && (lock->start < pend_lock->start + pend_lock->size))
300                 return True;
301         return False;
302 }
303
304 /****************************************************************************
305  Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
306  is the same as this one and changes its error code. I wonder if any
307  app depends on this ?
308 ****************************************************************************/
309
310 static NTSTATUS brl_lock_failed(files_struct *fsp,
311                                 const struct lock_struct *lock,
312                                 bool blocking_lock)
313 {
314         if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
315                 /* amazing the little things you learn with a test
316                    suite. Locks beyond this offset (as a 64 bit
317                    number!) always generate the conflict error code,
318                    unless the top bit is set */
319                 if (!blocking_lock) {
320                         fsp->last_lock_failure = *lock;
321                 }
322                 return NT_STATUS_FILE_LOCK_CONFLICT;
323         }
324
325         if (serverid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
326                         lock->context.tid == fsp->last_lock_failure.context.tid &&
327                         lock->fnum == fsp->last_lock_failure.fnum &&
328                         lock->start == fsp->last_lock_failure.start) {
329                 return NT_STATUS_FILE_LOCK_CONFLICT;
330         }
331
332         if (!blocking_lock) {
333                 fsp->last_lock_failure = *lock;
334         }
335         return NT_STATUS_LOCK_NOT_GRANTED;
336 }
337
338 /****************************************************************************
339  Open up the brlock.tdb database.
340 ****************************************************************************/
341
342 void brl_init(bool read_only)
343 {
344         int tdb_flags;
345
346         if (brlock_db) {
347                 return;
348         }
349
350         tdb_flags = TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST|TDB_INCOMPATIBLE_HASH;
351
352         if (!lp_clustering()) {
353                 /*
354                  * We can't use the SEQNUM trick to cache brlock
355                  * entries in the clustering case because ctdb seqnum
356                  * propagation has a delay.
357                  */
358                 tdb_flags |= TDB_SEQNUM;
359         }
360
361         brlock_db = db_open(NULL, lock_path("brlock.tdb"),
362                             SMB_OPEN_DATABASE_TDB_HASH_SIZE, tdb_flags,
363                             read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644,
364                             DBWRAP_LOCK_ORDER_2, DBWRAP_FLAG_NONE);
365         if (!brlock_db) {
366                 DEBUG(0,("Failed to open byte range locking database %s\n",
367                         lock_path("brlock.tdb")));
368                 return;
369         }
370 }
371
372 /****************************************************************************
373  Close down the brlock.tdb database.
374 ****************************************************************************/
375
376 void brl_shutdown(void)
377 {
378         TALLOC_FREE(brlock_db);
379 }
380
381 #if ZERO_ZERO
382 /****************************************************************************
383  Compare two locks for sorting.
384 ****************************************************************************/
385
386 static int lock_compare(const struct lock_struct *lck1,
387                          const struct lock_struct *lck2)
388 {
389         if (lck1->start != lck2->start) {
390                 return (lck1->start - lck2->start);
391         }
392         if (lck2->size != lck1->size) {
393                 return ((int)lck1->size - (int)lck2->size);
394         }
395         return 0;
396 }
397 #endif
398
399 /****************************************************************************
400  Lock a range of bytes - Windows lock semantics.
401 ****************************************************************************/
402
403 NTSTATUS brl_lock_windows_default(struct byte_range_lock *br_lck,
404     struct lock_struct *plock, bool blocking_lock)
405 {
406         unsigned int i;
407         files_struct *fsp = br_lck->fsp;
408         struct lock_struct *locks = br_lck->lock_data;
409         NTSTATUS status;
410
411         SMB_ASSERT(plock->lock_type != UNLOCK_LOCK);
412
413         if ((plock->start + plock->size - 1 < plock->start) &&
414                         plock->size != 0) {
415                 return NT_STATUS_INVALID_LOCK_RANGE;
416         }
417
418         for (i=0; i < br_lck->num_locks; i++) {
419                 /* Do any Windows or POSIX locks conflict ? */
420                 if (brl_conflict(&locks[i], plock)) {
421                         if (!serverid_exists(&locks[i].context.pid)) {
422                                 locks[i].context.pid.pid = 0;
423                                 br_lck->modified = true;
424                                 continue;
425                         }
426                         /* Remember who blocked us. */
427                         plock->context.smblctx = locks[i].context.smblctx;
428                         return brl_lock_failed(fsp,plock,blocking_lock);
429                 }
430 #if ZERO_ZERO
431                 if (plock->start == 0 && plock->size == 0 &&
432                                 locks[i].size == 0) {
433                         break;
434                 }
435 #endif
436         }
437
438         if (!IS_PENDING_LOCK(plock->lock_type)) {
439                 contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
440         }
441
442         /* We can get the Windows lock, now see if it needs to
443            be mapped into a lower level POSIX one, and if so can
444            we get it ? */
445
446         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
447                 int errno_ret;
448                 if (!set_posix_lock_windows_flavour(fsp,
449                                 plock->start,
450                                 plock->size,
451                                 plock->lock_type,
452                                 &plock->context,
453                                 locks,
454                                 br_lck->num_locks,
455                                 &errno_ret)) {
456
457                         /* We don't know who blocked us. */
458                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
459
460                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
461                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
462                                 goto fail;
463                         } else {
464                                 status = map_nt_error_from_unix(errno);
465                                 goto fail;
466                         }
467                 }
468         }
469
470         /* no conflicts - add it to the list of locks */
471         locks = talloc_realloc(br_lck, locks, struct lock_struct,
472                                (br_lck->num_locks + 1));
473         if (!locks) {
474                 status = NT_STATUS_NO_MEMORY;
475                 goto fail;
476         }
477
478         memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
479         br_lck->num_locks += 1;
480         br_lck->lock_data = locks;
481         br_lck->modified = True;
482
483         return NT_STATUS_OK;
484  fail:
485         if (!IS_PENDING_LOCK(plock->lock_type)) {
486                 contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
487         }
488         return status;
489 }
490
491 /****************************************************************************
492  Cope with POSIX range splits and merges.
493 ****************************************************************************/
494
495 static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,       /* Output array. */
496                                                 struct lock_struct *ex,         /* existing lock. */
497                                                 struct lock_struct *plock)      /* proposed lock. */
498 {
499         bool lock_types_differ = (ex->lock_type != plock->lock_type);
500
501         /* We can't merge non-conflicting locks on different context - ignore fnum. */
502
503         if (!brl_same_context(&ex->context, &plock->context)) {
504                 /* Just copy. */
505                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
506                 return 1;
507         }
508
509         /* We now know we have the same context. */
510
511         /* Did we overlap ? */
512
513 /*********************************************
514                                         +---------+
515                                         | ex      |
516                                         +---------+
517                          +-------+
518                          | plock |
519                          +-------+
520 OR....
521         +---------+
522         |  ex     |
523         +---------+
524 **********************************************/
525
526         if ( (ex->start > (plock->start + plock->size)) ||
527                 (plock->start > (ex->start + ex->size))) {
528
529                 /* No overlap with this lock - copy existing. */
530
531                 memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
532                 return 1;
533         }
534
535 /*********************************************
536         +---------------------------+
537         |          ex               |
538         +---------------------------+
539         +---------------------------+
540         |       plock               | -> replace with plock.
541         +---------------------------+
542 OR
543              +---------------+
544              |       ex      |
545              +---------------+
546         +---------------------------+
547         |       plock               | -> replace with plock.
548         +---------------------------+
549
550 **********************************************/
551
552         if ( (ex->start >= plock->start) &&
553                 (ex->start + ex->size <= plock->start + plock->size) ) {
554
555                 /* Replace - discard existing lock. */
556
557                 return 0;
558         }
559
560 /*********************************************
561 Adjacent after.
562                         +-------+
563                         |  ex   |
564                         +-------+
565         +---------------+
566         |   plock       |
567         +---------------+
568
569 BECOMES....
570         +---------------+-------+
571         |   plock       | ex    | - different lock types.
572         +---------------+-------+
573 OR.... (merge)
574         +-----------------------+
575         |   plock               | - same lock type.
576         +-----------------------+
577 **********************************************/
578
579         if (plock->start + plock->size == ex->start) {
580
581                 /* If the lock types are the same, we merge, if different, we
582                    add the remainder of the old lock. */
583
584                 if (lock_types_differ) {
585                         /* Add existing. */
586                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
587                         return 1;
588                 } else {
589                         /* Merge - adjust incoming lock as we may have more
590                          * merging to come. */
591                         plock->size += ex->size;
592                         return 0;
593                 }
594         }
595
596 /*********************************************
597 Adjacent before.
598         +-------+
599         |  ex   |
600         +-------+
601                 +---------------+
602                 |   plock       |
603                 +---------------+
604 BECOMES....
605         +-------+---------------+
606         | ex    |   plock       | - different lock types
607         +-------+---------------+
608
609 OR.... (merge)
610         +-----------------------+
611         |      plock            | - same lock type.
612         +-----------------------+
613
614 **********************************************/
615
616         if (ex->start + ex->size == plock->start) {
617
618                 /* If the lock types are the same, we merge, if different, we
619                    add the existing lock. */
620
621                 if (lock_types_differ) {
622                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
623                         return 1;
624                 } else {
625                         /* Merge - adjust incoming lock as we may have more
626                          * merging to come. */
627                         plock->start = ex->start;
628                         plock->size += ex->size;
629                         return 0;
630                 }
631         }
632
633 /*********************************************
634 Overlap after.
635         +-----------------------+
636         |          ex           |
637         +-----------------------+
638         +---------------+
639         |   plock       |
640         +---------------+
641 OR
642                +----------------+
643                |       ex       |
644                +----------------+
645         +---------------+
646         |   plock       |
647         +---------------+
648
649 BECOMES....
650         +---------------+-------+
651         |   plock       | ex    | - different lock types.
652         +---------------+-------+
653 OR.... (merge)
654         +-----------------------+
655         |   plock               | - same lock type.
656         +-----------------------+
657 **********************************************/
658
659         if ( (ex->start >= plock->start) &&
660                 (ex->start <= plock->start + plock->size) &&
661                 (ex->start + ex->size > plock->start + plock->size) ) {
662
663                 /* If the lock types are the same, we merge, if different, we
664                    add the remainder of the old lock. */
665
666                 if (lock_types_differ) {
667                         /* Add remaining existing. */
668                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
669                         /* Adjust existing start and size. */
670                         lck_arr[0].start = plock->start + plock->size;
671                         lck_arr[0].size = (ex->start + ex->size) - (plock->start + plock->size);
672                         return 1;
673                 } else {
674                         /* Merge - adjust incoming lock as we may have more
675                          * merging to come. */
676                         plock->size += (ex->start + ex->size) - (plock->start + plock->size);
677                         return 0;
678                 }
679         }
680
681 /*********************************************
682 Overlap before.
683         +-----------------------+
684         |  ex                   |
685         +-----------------------+
686                 +---------------+
687                 |   plock       |
688                 +---------------+
689 OR
690         +-------------+
691         |  ex         |
692         +-------------+
693                 +---------------+
694                 |   plock       |
695                 +---------------+
696
697 BECOMES....
698         +-------+---------------+
699         | ex    |   plock       | - different lock types
700         +-------+---------------+
701
702 OR.... (merge)
703         +-----------------------+
704         |      plock            | - same lock type.
705         +-----------------------+
706
707 **********************************************/
708
709         if ( (ex->start < plock->start) &&
710                         (ex->start + ex->size >= plock->start) &&
711                         (ex->start + ex->size <= plock->start + plock->size) ) {
712
713                 /* If the lock types are the same, we merge, if different, we
714                    add the truncated old lock. */
715
716                 if (lock_types_differ) {
717                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
718                         /* Adjust existing size. */
719                         lck_arr[0].size = plock->start - ex->start;
720                         return 1;
721                 } else {
722                         /* Merge - adjust incoming lock as we may have more
723                          * merging to come. MUST ADJUST plock SIZE FIRST ! */
724                         plock->size += (plock->start - ex->start);
725                         plock->start = ex->start;
726                         return 0;
727                 }
728         }
729
730 /*********************************************
731 Complete overlap.
732         +---------------------------+
733         |        ex                 |
734         +---------------------------+
735                 +---------+
736                 |  plock  |
737                 +---------+
738 BECOMES.....
739         +-------+---------+---------+
740         | ex    |  plock  | ex      | - different lock types.
741         +-------+---------+---------+
742 OR
743         +---------------------------+
744         |        plock              | - same lock type.
745         +---------------------------+
746 **********************************************/
747
748         if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
749
750                 if (lock_types_differ) {
751
752                         /* We have to split ex into two locks here. */
753
754                         memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
755                         memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
756
757                         /* Adjust first existing size. */
758                         lck_arr[0].size = plock->start - ex->start;
759
760                         /* Adjust second existing start and size. */
761                         lck_arr[1].start = plock->start + plock->size;
762                         lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
763                         return 2;
764                 } else {
765                         /* Just eat the existing locks, merge them into plock. */
766                         plock->start = ex->start;
767                         plock->size = ex->size;
768                         return 0;
769                 }
770         }
771
772         /* Never get here. */
773         smb_panic("brlock_posix_split_merge");
774         /* Notreached. */
775
776         /* Keep some compilers happy. */
777         return 0;
778 }
779
780 /****************************************************************************
781  Lock a range of bytes - POSIX lock semantics.
782  We must cope with range splits and merges.
783 ****************************************************************************/
784
785 static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
786                                struct byte_range_lock *br_lck,
787                                struct lock_struct *plock)
788 {
789         unsigned int i, count, posix_count;
790         struct lock_struct *locks = br_lck->lock_data;
791         struct lock_struct *tp;
792         bool signal_pending_read = False;
793         bool break_oplocks = false;
794         NTSTATUS status;
795
796         /* No zero-zero locks for POSIX. */
797         if (plock->start == 0 && plock->size == 0) {
798                 return NT_STATUS_INVALID_PARAMETER;
799         }
800
801         /* Don't allow 64-bit lock wrap. */
802         if (plock->start + plock->size - 1 < plock->start) {
803                 return NT_STATUS_INVALID_PARAMETER;
804         }
805
806         /* The worst case scenario here is we have to split an
807            existing POSIX lock range into two, and add our lock,
808            so we need at most 2 more entries. */
809
810         tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 2);
811         if (!tp) {
812                 return NT_STATUS_NO_MEMORY;
813         }
814
815         count = posix_count = 0;
816
817         for (i=0; i < br_lck->num_locks; i++) {
818                 struct lock_struct *curr_lock = &locks[i];
819
820                 /* If we have a pending read lock, a lock downgrade should
821                    trigger a lock re-evaluation. */
822                 if (curr_lock->lock_type == PENDING_READ_LOCK &&
823                                 brl_pending_overlap(plock, curr_lock)) {
824                         signal_pending_read = True;
825                 }
826
827                 if (curr_lock->lock_flav == WINDOWS_LOCK) {
828                         /* Do any Windows flavour locks conflict ? */
829                         if (brl_conflict(curr_lock, plock)) {
830                                 if (!serverid_exists(&curr_lock->context.pid)) {
831                                         curr_lock->context.pid.pid = 0;
832                                         br_lck->modified = true;
833                                         continue;
834                                 }
835                                 /* No games with error messages. */
836                                 TALLOC_FREE(tp);
837                                 /* Remember who blocked us. */
838                                 plock->context.smblctx = curr_lock->context.smblctx;
839                                 return NT_STATUS_FILE_LOCK_CONFLICT;
840                         }
841                         /* Just copy the Windows lock into the new array. */
842                         memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
843                         count++;
844                 } else {
845                         unsigned int tmp_count = 0;
846
847                         /* POSIX conflict semantics are different. */
848                         if (brl_conflict_posix(curr_lock, plock)) {
849                                 if (!serverid_exists(&curr_lock->context.pid)) {
850                                         curr_lock->context.pid.pid = 0;
851                                         br_lck->modified = true;
852                                         continue;
853                                 }
854                                 /* Can't block ourselves with POSIX locks. */
855                                 /* No games with error messages. */
856                                 TALLOC_FREE(tp);
857                                 /* Remember who blocked us. */
858                                 plock->context.smblctx = curr_lock->context.smblctx;
859                                 return NT_STATUS_FILE_LOCK_CONFLICT;
860                         }
861
862                         /* Work out overlaps. */
863                         tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock);
864                         posix_count += tmp_count;
865                         count += tmp_count;
866                 }
867         }
868
869         /*
870          * Break oplocks while we hold a brl. Since lock() and unlock() calls
871          * are not symetric with POSIX semantics, we cannot guarantee our
872          * contend_level2_oplocks_begin/end calls will be acquired and
873          * released one-for-one as with Windows semantics. Therefore we only
874          * call contend_level2_oplocks_begin if this is the first POSIX brl on
875          * the file.
876          */
877         break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
878                          posix_count == 0);
879         if (break_oplocks) {
880                 contend_level2_oplocks_begin(br_lck->fsp,
881                                              LEVEL2_CONTEND_POSIX_BRL);
882         }
883
884         /* Try and add the lock in order, sorted by lock start. */
885         for (i=0; i < count; i++) {
886                 struct lock_struct *curr_lock = &tp[i];
887
888                 if (curr_lock->start <= plock->start) {
889                         continue;
890                 }
891         }
892
893         if (i < count) {
894                 memmove(&tp[i+1], &tp[i],
895                         (count - i)*sizeof(struct lock_struct));
896         }
897         memcpy(&tp[i], plock, sizeof(struct lock_struct));
898         count++;
899
900         /* We can get the POSIX lock, now see if it needs to
901            be mapped into a lower level POSIX one, and if so can
902            we get it ? */
903
904         if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
905                 int errno_ret;
906
907                 /* The lower layer just needs to attempt to
908                    get the system POSIX lock. We've weeded out
909                    any conflicts above. */
910
911                 if (!set_posix_lock_posix_flavour(br_lck->fsp,
912                                 plock->start,
913                                 plock->size,
914                                 plock->lock_type,
915                                 &errno_ret)) {
916
917                         /* We don't know who blocked us. */
918                         plock->context.smblctx = 0xFFFFFFFFFFFFFFFFLL;
919
920                         if (errno_ret == EACCES || errno_ret == EAGAIN) {
921                                 TALLOC_FREE(tp);
922                                 status = NT_STATUS_FILE_LOCK_CONFLICT;
923                                 goto fail;
924                         } else {
925                                 TALLOC_FREE(tp);
926                                 status = map_nt_error_from_unix(errno);
927                                 goto fail;
928                         }
929                 }
930         }
931
932         /* If we didn't use all the allocated size,
933          * Realloc so we don't leak entries per lock call. */
934         if (count < br_lck->num_locks + 2) {
935                 tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
936                 if (!tp) {
937                         status = NT_STATUS_NO_MEMORY;
938                         goto fail;
939                 }
940         }
941
942         br_lck->num_locks = count;
943         TALLOC_FREE(br_lck->lock_data);
944         br_lck->lock_data = tp;
945         locks = tp;
946         br_lck->modified = True;
947
948         /* A successful downgrade from write to read lock can trigger a lock
949            re-evalutation where waiting readers can now proceed. */
950
951         if (signal_pending_read) {
952                 /* Send unlock messages to any pending read waiters that overlap. */
953                 for (i=0; i < br_lck->num_locks; i++) {
954                         struct lock_struct *pend_lock = &locks[i];
955
956                         /* Ignore non-pending locks. */
957                         if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
958                                 continue;
959                         }
960
961                         if (pend_lock->lock_type == PENDING_READ_LOCK &&
962                                         brl_pending_overlap(plock, pend_lock)) {
963                                 DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
964                                         procid_str_static(&pend_lock->context.pid )));
965
966                                 messaging_send(msg_ctx, pend_lock->context.pid,
967                                                MSG_SMB_UNLOCK, &data_blob_null);
968                         }
969                 }
970         }
971
972         return NT_STATUS_OK;
973  fail:
974         if (break_oplocks) {
975                 contend_level2_oplocks_end(br_lck->fsp,
976                                            LEVEL2_CONTEND_POSIX_BRL);
977         }
978         return status;
979 }
980
981 NTSTATUS smb_vfs_call_brl_lock_windows(struct vfs_handle_struct *handle,
982                                        struct byte_range_lock *br_lck,
983                                        struct lock_struct *plock,
984                                        bool blocking_lock)
985 {
986         VFS_FIND(brl_lock_windows);
987         return handle->fns->brl_lock_windows_fn(handle, br_lck, plock,
988                                                 blocking_lock);
989 }
990
991 /****************************************************************************
992  Lock a range of bytes.
993 ****************************************************************************/
994
995 NTSTATUS brl_lock(struct messaging_context *msg_ctx,
996                 struct byte_range_lock *br_lck,
997                 uint64_t smblctx,
998                 struct server_id pid,
999                 br_off start,
1000                 br_off size,
1001                 enum brl_type lock_type,
1002                 enum brl_flavour lock_flav,
1003                 bool blocking_lock,
1004                 uint64_t *psmblctx)
1005 {
1006         NTSTATUS ret;
1007         struct lock_struct lock;
1008
1009 #if !ZERO_ZERO
1010         if (start == 0 && size == 0) {
1011                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
1012         }
1013 #endif
1014
1015         lock = (struct lock_struct) {
1016                 .context.smblctx = smblctx,
1017                 .context.pid = pid,
1018                 .context.tid = br_lck->fsp->conn->cnum,
1019                 .start = start,
1020                 .size = size,
1021                 .fnum = br_lck->fsp->fnum,
1022                 .lock_type = lock_type,
1023                 .lock_flav = lock_flav
1024         };
1025
1026         if (lock_flav == WINDOWS_LOCK) {
1027                 ret = SMB_VFS_BRL_LOCK_WINDOWS(br_lck->fsp->conn, br_lck,
1028                                                &lock, blocking_lock);
1029         } else {
1030                 ret = brl_lock_posix(msg_ctx, br_lck, &lock);
1031         }
1032
1033 #if ZERO_ZERO
1034         /* sort the lock list */
1035         TYPESAFE_QSORT(br_lck->lock_data, (size_t)br_lck->num_locks, lock_compare);
1036 #endif
1037
1038         /* If we're returning an error, return who blocked us. */
1039         if (!NT_STATUS_IS_OK(ret) && psmblctx) {
1040                 *psmblctx = lock.context.smblctx;
1041         }
1042         return ret;
1043 }
1044
1045 static void brl_delete_lock_struct(struct lock_struct *locks,
1046                                    unsigned num_locks,
1047                                    unsigned del_idx)
1048 {
1049         if (del_idx >= num_locks) {
1050                 return;
1051         }
1052         memmove(&locks[del_idx], &locks[del_idx+1],
1053                 sizeof(*locks) * (num_locks - del_idx - 1));
1054 }
1055
1056 /****************************************************************************
1057  Unlock a range of bytes - Windows semantics.
1058 ****************************************************************************/
1059
1060 bool brl_unlock_windows_default(struct messaging_context *msg_ctx,
1061                                struct byte_range_lock *br_lck,
1062                                const struct lock_struct *plock)
1063 {
1064         unsigned int i, j;
1065         struct lock_struct *locks = br_lck->lock_data;
1066         enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
1067
1068         SMB_ASSERT(plock->lock_type == UNLOCK_LOCK);
1069
1070 #if ZERO_ZERO
1071         /* Delete write locks by preference... The lock list
1072            is sorted in the zero zero case. */
1073
1074         for (i = 0; i < br_lck->num_locks; i++) {
1075                 struct lock_struct *lock = &locks[i];
1076
1077                 if (lock->lock_type == WRITE_LOCK &&
1078                     brl_same_context(&lock->context, &plock->context) &&
1079                     lock->fnum == plock->fnum &&
1080                     lock->lock_flav == WINDOWS_LOCK &&
1081                     lock->start == plock->start &&
1082                     lock->size == plock->size) {
1083
1084                         /* found it - delete it */
1085                         deleted_lock_type = lock->lock_type;
1086                         break;
1087                 }
1088         }
1089
1090         if (i != br_lck->num_locks) {
1091                 /* We found it - don't search again. */
1092                 goto unlock_continue;
1093         }
1094 #endif
1095
1096         for (i = 0; i < br_lck->num_locks; i++) {
1097                 struct lock_struct *lock = &locks[i];
1098
1099                 if (IS_PENDING_LOCK(lock->lock_type)) {
1100                         continue;
1101                 }
1102
1103                 /* Only remove our own locks that match in start, size, and flavour. */
1104                 if (brl_same_context(&lock->context, &plock->context) &&
1105                                         lock->fnum == plock->fnum &&
1106                                         lock->lock_flav == WINDOWS_LOCK &&
1107                                         lock->start == plock->start &&
1108                                         lock->size == plock->size ) {
1109                         deleted_lock_type = lock->lock_type;
1110                         break;
1111                 }
1112         }
1113
1114         if (i == br_lck->num_locks) {
1115                 /* we didn't find it */
1116                 return False;
1117         }
1118
1119 #if ZERO_ZERO
1120   unlock_continue:
1121 #endif
1122
1123         brl_delete_lock_struct(locks, br_lck->num_locks, i);
1124         br_lck->num_locks -= 1;
1125         br_lck->modified = True;
1126
1127         /* Unlock the underlying POSIX regions. */
1128         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1129                 release_posix_lock_windows_flavour(br_lck->fsp,
1130                                 plock->start,
1131                                 plock->size,
1132                                 deleted_lock_type,
1133                                 &plock->context,
1134                                 locks,
1135                                 br_lck->num_locks);
1136         }
1137
1138         /* Send unlock messages to any pending waiters that overlap. */
1139         for (j=0; j < br_lck->num_locks; j++) {
1140                 struct lock_struct *pend_lock = &locks[j];
1141
1142                 /* Ignore non-pending locks. */
1143                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1144                         continue;
1145                 }
1146
1147                 /* We could send specific lock info here... */
1148                 if (brl_pending_overlap(plock, pend_lock)) {
1149                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1150                                 procid_str_static(&pend_lock->context.pid )));
1151
1152                         messaging_send(msg_ctx, pend_lock->context.pid,
1153                                        MSG_SMB_UNLOCK, &data_blob_null);
1154                 }
1155         }
1156
1157         contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
1158         return True;
1159 }
1160
1161 /****************************************************************************
1162  Unlock a range of bytes - POSIX semantics.
1163 ****************************************************************************/
1164
1165 static bool brl_unlock_posix(struct messaging_context *msg_ctx,
1166                              struct byte_range_lock *br_lck,
1167                              struct lock_struct *plock)
1168 {
1169         unsigned int i, j, count;
1170         struct lock_struct *tp;
1171         struct lock_struct *locks = br_lck->lock_data;
1172         bool overlap_found = False;
1173
1174         /* No zero-zero locks for POSIX. */
1175         if (plock->start == 0 && plock->size == 0) {
1176                 return False;
1177         }
1178
1179         /* Don't allow 64-bit lock wrap. */
1180         if (plock->start + plock->size < plock->start ||
1181                         plock->start + plock->size < plock->size) {
1182                 DEBUG(10,("brl_unlock_posix: lock wrap\n"));
1183                 return False;
1184         }
1185
1186         /* The worst case scenario here is we have to split an
1187            existing POSIX lock range into two, so we need at most
1188            1 more entry. */
1189
1190         tp = talloc_array(br_lck, struct lock_struct, br_lck->num_locks + 1);
1191         if (!tp) {
1192                 DEBUG(10,("brl_unlock_posix: malloc fail\n"));
1193                 return False;
1194         }
1195
1196         count = 0;
1197         for (i = 0; i < br_lck->num_locks; i++) {
1198                 struct lock_struct *lock = &locks[i];
1199                 unsigned int tmp_count;
1200
1201                 /* Only remove our own locks - ignore fnum. */
1202                 if (IS_PENDING_LOCK(lock->lock_type) ||
1203                                 !brl_same_context(&lock->context, &plock->context)) {
1204                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1205                         count++;
1206                         continue;
1207                 }
1208
1209                 if (lock->lock_flav == WINDOWS_LOCK) {
1210                         /* Do any Windows flavour locks conflict ? */
1211                         if (brl_conflict(lock, plock)) {
1212                                 TALLOC_FREE(tp);
1213                                 return false;
1214                         }
1215                         /* Just copy the Windows lock into the new array. */
1216                         memcpy(&tp[count], lock, sizeof(struct lock_struct));
1217                         count++;
1218                         continue;
1219                 }
1220
1221                 /* Work out overlaps. */
1222                 tmp_count = brlock_posix_split_merge(&tp[count], lock, plock);
1223
1224                 if (tmp_count == 0) {
1225                         /* plock overlapped the existing lock completely,
1226                            or replaced it. Don't copy the existing lock. */
1227                         overlap_found = true;
1228                 } else if (tmp_count == 1) {
1229                         /* Either no overlap, (simple copy of existing lock) or
1230                          * an overlap of an existing lock. */
1231                         /* If the lock changed size, we had an overlap. */
1232                         if (tp[count].size != lock->size) {
1233                                 overlap_found = true;
1234                         }
1235                         count += tmp_count;
1236                 } else if (tmp_count == 2) {
1237                         /* We split a lock range in two. */
1238                         overlap_found = true;
1239                         count += tmp_count;
1240
1241                         /* Optimisation... */
1242                         /* We know we're finished here as we can't overlap any
1243                            more POSIX locks. Copy the rest of the lock array. */
1244
1245                         if (i < br_lck->num_locks - 1) {
1246                                 memcpy(&tp[count], &locks[i+1],
1247                                         sizeof(*locks)*((br_lck->num_locks-1) - i));
1248                                 count += ((br_lck->num_locks-1) - i);
1249                         }
1250                         break;
1251                 }
1252
1253         }
1254
1255         if (!overlap_found) {
1256                 /* Just ignore - no change. */
1257                 TALLOC_FREE(tp);
1258                 DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
1259                 return True;
1260         }
1261
1262         /* Unlock any POSIX regions. */
1263         if(lp_posix_locking(br_lck->fsp->conn->params)) {
1264                 release_posix_lock_posix_flavour(br_lck->fsp,
1265                                                 plock->start,
1266                                                 plock->size,
1267                                                 &plock->context,
1268                                                 tp,
1269                                                 count);
1270         }
1271
1272         /* Realloc so we don't leak entries per unlock call. */
1273         if (count) {
1274                 tp = talloc_realloc(br_lck, tp, struct lock_struct, count);
1275                 if (!tp) {
1276                         DEBUG(10,("brl_unlock_posix: realloc fail\n"));
1277                         return False;
1278                 }
1279         } else {
1280                 /* We deleted the last lock. */
1281                 TALLOC_FREE(tp);
1282                 tp = NULL;
1283         }
1284
1285         contend_level2_oplocks_end(br_lck->fsp,
1286                                    LEVEL2_CONTEND_POSIX_BRL);
1287
1288         br_lck->num_locks = count;
1289         TALLOC_FREE(br_lck->lock_data);
1290         locks = tp;
1291         br_lck->lock_data = tp;
1292         br_lck->modified = True;
1293
1294         /* Send unlock messages to any pending waiters that overlap. */
1295
1296         for (j=0; j < br_lck->num_locks; j++) {
1297                 struct lock_struct *pend_lock = &locks[j];
1298
1299                 /* Ignore non-pending locks. */
1300                 if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
1301                         continue;
1302                 }
1303
1304                 /* We could send specific lock info here... */
1305                 if (brl_pending_overlap(plock, pend_lock)) {
1306                         DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
1307                                 procid_str_static(&pend_lock->context.pid )));
1308
1309                         messaging_send(msg_ctx, pend_lock->context.pid,
1310                                        MSG_SMB_UNLOCK, &data_blob_null);
1311                 }
1312         }
1313
1314         return True;
1315 }
1316
1317 bool smb_vfs_call_brl_unlock_windows(struct vfs_handle_struct *handle,
1318                                      struct messaging_context *msg_ctx,
1319                                      struct byte_range_lock *br_lck,
1320                                      const struct lock_struct *plock)
1321 {
1322         VFS_FIND(brl_unlock_windows);
1323         return handle->fns->brl_unlock_windows_fn(handle, msg_ctx, br_lck,
1324                                                   plock);
1325 }
1326
1327 /****************************************************************************
1328  Unlock a range of bytes.
1329 ****************************************************************************/
1330
1331 bool brl_unlock(struct messaging_context *msg_ctx,
1332                 struct byte_range_lock *br_lck,
1333                 uint64_t smblctx,
1334                 struct server_id pid,
1335                 br_off start,
1336                 br_off size,
1337                 enum brl_flavour lock_flav)
1338 {
1339         struct lock_struct lock;
1340
1341         lock.context.smblctx = smblctx;
1342         lock.context.pid = pid;
1343         lock.context.tid = br_lck->fsp->conn->cnum;
1344         lock.start = start;
1345         lock.size = size;
1346         lock.fnum = br_lck->fsp->fnum;
1347         lock.lock_type = UNLOCK_LOCK;
1348         lock.lock_flav = lock_flav;
1349
1350         if (lock_flav == WINDOWS_LOCK) {
1351                 return SMB_VFS_BRL_UNLOCK_WINDOWS(br_lck->fsp->conn, msg_ctx,
1352                     br_lck, &lock);
1353         } else {
1354                 return brl_unlock_posix(msg_ctx, br_lck, &lock);
1355         }
1356 }
1357
1358 /****************************************************************************
1359  Test if we could add a lock if we wanted to.
1360  Returns True if the region required is currently unlocked, False if locked.
1361 ****************************************************************************/
1362
1363 bool brl_locktest(struct byte_range_lock *br_lck,
1364                   const struct lock_struct *rw_probe)
1365 {
1366         bool ret = True;
1367         unsigned int i;
1368         struct lock_struct *locks = br_lck->lock_data;
1369         files_struct *fsp = br_lck->fsp;
1370
1371         /* Make sure existing locks don't conflict */
1372         for (i=0; i < br_lck->num_locks; i++) {
1373                 /*
1374                  * Our own locks don't conflict.
1375                  */
1376                 if (brl_conflict_other(&locks[i], rw_probe)) {
1377                         if (br_lck->record == NULL) {
1378                                 /* readonly */
1379                                 return false;
1380                         }
1381
1382                         if (!serverid_exists(&locks[i].context.pid)) {
1383                                 locks[i].context.pid.pid = 0;
1384                                 br_lck->modified = true;
1385                                 continue;
1386                         }
1387
1388                         return False;
1389                 }
1390         }
1391
1392         /*
1393          * There is no lock held by an SMB daemon, check to
1394          * see if there is a POSIX lock from a UNIX or NFS process.
1395          * This only conflicts with Windows locks, not POSIX locks.
1396          */
1397
1398         if(lp_posix_locking(fsp->conn->params) &&
1399            (rw_probe->lock_flav == WINDOWS_LOCK)) {
1400                 /*
1401                  * Make copies -- is_posix_locked might modify the values
1402                  */
1403
1404                 br_off start = rw_probe->start;
1405                 br_off size = rw_probe->size;
1406                 enum brl_type lock_type = rw_probe->lock_type;
1407
1408                 ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
1409
1410                 DEBUG(10, ("brl_locktest: posix start=%ju len=%ju %s for %s "
1411                            "file %s\n", (uintmax_t)start, (uintmax_t)size,
1412                            ret ? "locked" : "unlocked",
1413                            fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1414
1415                 /* We need to return the inverse of is_posix_locked. */
1416                 ret = !ret;
1417         }
1418
1419         /* no conflicts - we could have added it */
1420         return ret;
1421 }
1422
1423 /****************************************************************************
1424  Query for existing locks.
1425 ****************************************************************************/
1426
1427 NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
1428                 uint64_t *psmblctx,
1429                 struct server_id pid,
1430                 br_off *pstart,
1431                 br_off *psize,
1432                 enum brl_type *plock_type,
1433                 enum brl_flavour lock_flav)
1434 {
1435         unsigned int i;
1436         struct lock_struct lock;
1437         const struct lock_struct *locks = br_lck->lock_data;
1438         files_struct *fsp = br_lck->fsp;
1439
1440         lock.context.smblctx = *psmblctx;
1441         lock.context.pid = pid;
1442         lock.context.tid = br_lck->fsp->conn->cnum;
1443         lock.start = *pstart;
1444         lock.size = *psize;
1445         lock.fnum = fsp->fnum;
1446         lock.lock_type = *plock_type;
1447         lock.lock_flav = lock_flav;
1448
1449         /* Make sure existing locks don't conflict */
1450         for (i=0; i < br_lck->num_locks; i++) {
1451                 const struct lock_struct *exlock = &locks[i];
1452                 bool conflict = False;
1453
1454                 if (exlock->lock_flav == WINDOWS_LOCK) {
1455                         conflict = brl_conflict(exlock, &lock);
1456                 } else {
1457                         conflict = brl_conflict_posix(exlock, &lock);
1458                 }
1459
1460                 if (conflict) {
1461                         *psmblctx = exlock->context.smblctx;
1462                         *pstart = exlock->start;
1463                         *psize = exlock->size;
1464                         *plock_type = exlock->lock_type;
1465                         return NT_STATUS_LOCK_NOT_GRANTED;
1466                 }
1467         }
1468
1469         /*
1470          * There is no lock held by an SMB daemon, check to
1471          * see if there is a POSIX lock from a UNIX or NFS process.
1472          */
1473
1474         if(lp_posix_locking(fsp->conn->params)) {
1475                 bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
1476
1477                 DEBUG(10, ("brl_lockquery: posix start=%ju len=%ju %s for %s "
1478                            "file %s\n", (uintmax_t)*pstart,
1479                            (uintmax_t)*psize, ret ? "locked" : "unlocked",
1480                            fsp_fnum_dbg(fsp), fsp_str_dbg(fsp)));
1481
1482                 if (ret) {
1483                         /* Hmmm. No clue what to set smblctx to - use -1. */
1484                         *psmblctx = 0xFFFFFFFFFFFFFFFFLL;
1485                         return NT_STATUS_LOCK_NOT_GRANTED;
1486                 }
1487         }
1488
1489         return NT_STATUS_OK;
1490 }
1491
1492
1493 bool smb_vfs_call_brl_cancel_windows(struct vfs_handle_struct *handle,
1494                                      struct byte_range_lock *br_lck,
1495                                      struct lock_struct *plock)
1496 {
1497         VFS_FIND(brl_cancel_windows);
1498         return handle->fns->brl_cancel_windows_fn(handle, br_lck, plock);
1499 }
1500
1501 /****************************************************************************
1502  Remove a particular pending lock.
1503 ****************************************************************************/
1504 bool brl_lock_cancel(struct byte_range_lock *br_lck,
1505                 uint64_t smblctx,
1506                 struct server_id pid,
1507                 br_off start,
1508                 br_off size,
1509                 enum brl_flavour lock_flav)
1510 {
1511         bool ret;
1512         struct lock_struct lock;
1513
1514         lock.context.smblctx = smblctx;
1515         lock.context.pid = pid;
1516         lock.context.tid = br_lck->fsp->conn->cnum;
1517         lock.start = start;
1518         lock.size = size;
1519         lock.fnum = br_lck->fsp->fnum;
1520         lock.lock_flav = lock_flav;
1521         /* lock.lock_type doesn't matter */
1522
1523         if (lock_flav == WINDOWS_LOCK) {
1524                 ret = SMB_VFS_BRL_CANCEL_WINDOWS(br_lck->fsp->conn, br_lck,
1525                                                  &lock);
1526         } else {
1527                 ret = brl_lock_cancel_default(br_lck, &lock);
1528         }
1529
1530         return ret;
1531 }
1532
1533 bool brl_lock_cancel_default(struct byte_range_lock *br_lck,
1534                 struct lock_struct *plock)
1535 {
1536         unsigned int i;
1537         struct lock_struct *locks = br_lck->lock_data;
1538
1539         SMB_ASSERT(plock);
1540
1541         for (i = 0; i < br_lck->num_locks; i++) {
1542                 struct lock_struct *lock = &locks[i];
1543
1544                 /* For pending locks we *always* care about the fnum. */
1545                 if (brl_same_context(&lock->context, &plock->context) &&
1546                                 lock->fnum == plock->fnum &&
1547                                 IS_PENDING_LOCK(lock->lock_type) &&
1548                                 lock->lock_flav == plock->lock_flav &&
1549                                 lock->start == plock->start &&
1550                                 lock->size == plock->size) {
1551                         break;
1552                 }
1553         }
1554
1555         if (i == br_lck->num_locks) {
1556                 /* Didn't find it. */
1557                 return False;
1558         }
1559
1560         brl_delete_lock_struct(locks, br_lck->num_locks, i);
1561         br_lck->num_locks -= 1;
1562         br_lck->modified = True;
1563         return True;
1564 }
1565
1566 /****************************************************************************
1567  Remove any locks associated with a open file.
1568  We return True if this process owns any other Windows locks on this
1569  fd and so we should not immediately close the fd.
1570 ****************************************************************************/
1571
1572 void brl_close_fnum(struct messaging_context *msg_ctx,
1573                     struct byte_range_lock *br_lck)
1574 {
1575         files_struct *fsp = br_lck->fsp;
1576         uint32_t tid = fsp->conn->cnum;
1577         uint64_t fnum = fsp->fnum;
1578         unsigned int i;
1579         struct lock_struct *locks = br_lck->lock_data;
1580         struct server_id pid = messaging_server_id(fsp->conn->sconn->msg_ctx);
1581         struct lock_struct *locks_copy;
1582         unsigned int num_locks_copy;
1583
1584         /* Copy the current lock array. */
1585         if (br_lck->num_locks) {
1586                 locks_copy = (struct lock_struct *)talloc_memdup(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
1587                 if (!locks_copy) {
1588                         smb_panic("brl_close_fnum: talloc failed");
1589                         }
1590         } else {
1591                 locks_copy = NULL;
1592         }
1593
1594         num_locks_copy = br_lck->num_locks;
1595
1596         for (i=0; i < num_locks_copy; i++) {
1597                 struct lock_struct *lock = &locks_copy[i];
1598
1599                 if (lock->context.tid == tid && serverid_equal(&lock->context.pid, &pid) &&
1600                                 (lock->fnum == fnum)) {
1601                         brl_unlock(msg_ctx,
1602                                 br_lck,
1603                                 lock->context.smblctx,
1604                                 pid,
1605                                 lock->start,
1606                                 lock->size,
1607                                 lock->lock_flav);
1608                 }
1609         }
1610 }
1611
1612 bool brl_mark_disconnected(struct files_struct *fsp)
1613 {
1614         uint32_t tid = fsp->conn->cnum;
1615         uint64_t smblctx;
1616         uint64_t fnum = fsp->fnum;
1617         unsigned int i;
1618         struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1619         struct byte_range_lock *br_lck = NULL;
1620
1621         if (fsp->op == NULL) {
1622                 return false;
1623         }
1624
1625         smblctx = fsp->op->global->open_persistent_id;
1626
1627         if (!fsp->op->global->durable) {
1628                 return false;
1629         }
1630
1631         if (fsp->current_lock_count == 0) {
1632                 return true;
1633         }
1634
1635         br_lck = brl_get_locks(talloc_tos(), fsp);
1636         if (br_lck == NULL) {
1637                 return false;
1638         }
1639
1640         for (i=0; i < br_lck->num_locks; i++) {
1641                 struct lock_struct *lock = &br_lck->lock_data[i];
1642
1643                 /*
1644                  * as this is a durable handle, we only expect locks
1645                  * of the current file handle!
1646                  */
1647
1648                 if (lock->context.smblctx != smblctx) {
1649                         TALLOC_FREE(br_lck);
1650                         return false;
1651                 }
1652
1653                 if (lock->context.tid != tid) {
1654                         TALLOC_FREE(br_lck);
1655                         return false;
1656                 }
1657
1658                 if (!serverid_equal(&lock->context.pid, &self)) {
1659                         TALLOC_FREE(br_lck);
1660                         return false;
1661                 }
1662
1663                 if (lock->fnum != fnum) {
1664                         TALLOC_FREE(br_lck);
1665                         return false;
1666                 }
1667
1668                 server_id_set_disconnected(&lock->context.pid);
1669                 lock->context.tid = TID_FIELD_INVALID;
1670                 lock->fnum = FNUM_FIELD_INVALID;
1671         }
1672
1673         br_lck->modified = true;
1674         TALLOC_FREE(br_lck);
1675         return true;
1676 }
1677
1678 bool brl_reconnect_disconnected(struct files_struct *fsp)
1679 {
1680         uint32_t tid = fsp->conn->cnum;
1681         uint64_t smblctx;
1682         uint64_t fnum = fsp->fnum;
1683         unsigned int i;
1684         struct server_id self = messaging_server_id(fsp->conn->sconn->msg_ctx);
1685         struct byte_range_lock *br_lck = NULL;
1686
1687         if (fsp->op == NULL) {
1688                 return false;
1689         }
1690
1691         smblctx = fsp->op->global->open_persistent_id;
1692
1693         if (!fsp->op->global->durable) {
1694                 return false;
1695         }
1696
1697         /*
1698          * When reconnecting, we do not want to validate the brlock entries
1699          * and thereby remove our own (disconnected) entries but reactivate
1700          * them instead.
1701          */
1702
1703         br_lck = brl_get_locks(talloc_tos(), fsp);
1704         if (br_lck == NULL) {
1705                 return false;
1706         }
1707
1708         if (br_lck->num_locks == 0) {
1709                 TALLOC_FREE(br_lck);
1710                 return true;
1711         }
1712
1713         for (i=0; i < br_lck->num_locks; i++) {
1714                 struct lock_struct *lock = &br_lck->lock_data[i];
1715
1716                 /*
1717                  * as this is a durable handle we only expect locks
1718                  * of the current file handle!
1719                  */
1720
1721                 if (lock->context.smblctx != smblctx) {
1722                         TALLOC_FREE(br_lck);
1723                         return false;
1724                 }
1725
1726                 if (lock->context.tid != TID_FIELD_INVALID) {
1727                         TALLOC_FREE(br_lck);
1728                         return false;
1729                 }
1730
1731                 if (!server_id_is_disconnected(&lock->context.pid)) {
1732                         TALLOC_FREE(br_lck);
1733                         return false;
1734                 }
1735
1736                 if (lock->fnum != FNUM_FIELD_INVALID) {
1737                         TALLOC_FREE(br_lck);
1738                         return false;
1739                 }
1740
1741                 lock->context.pid = self;
1742                 lock->context.tid = tid;
1743                 lock->fnum = fnum;
1744         }
1745
1746         fsp->current_lock_count = br_lck->num_locks;
1747         br_lck->modified = true;
1748         TALLOC_FREE(br_lck);
1749         return true;
1750 }
1751
1752 struct brl_forall_cb {
1753         void (*fn)(struct file_id id, struct server_id pid,
1754                    enum brl_type lock_type,
1755                    enum brl_flavour lock_flav,
1756                    br_off start, br_off size,
1757                    void *private_data);
1758         void *private_data;
1759 };
1760
1761 /****************************************************************************
1762  Traverse the whole database with this function, calling traverse_callback
1763  on each lock.
1764 ****************************************************************************/
1765
1766 static int brl_traverse_fn(struct db_record *rec, void *state)
1767 {
1768         struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
1769         struct lock_struct *locks;
1770         struct file_id *key;
1771         unsigned int i;
1772         unsigned int num_locks = 0;
1773         TDB_DATA dbkey;
1774         TDB_DATA value;
1775
1776         dbkey = dbwrap_record_get_key(rec);
1777         value = dbwrap_record_get_value(rec);
1778
1779         /* In a traverse function we must make a copy of
1780            dbuf before modifying it. */
1781
1782         locks = (struct lock_struct *)talloc_memdup(
1783                 talloc_tos(), value.dptr, value.dsize);
1784         if (!locks) {
1785                 return -1; /* Terminate traversal. */
1786         }
1787
1788         key = (struct file_id *)dbkey.dptr;
1789         num_locks = value.dsize/sizeof(*locks);
1790
1791         if (cb->fn) {
1792                 for ( i=0; i<num_locks; i++) {
1793                         cb->fn(*key,
1794                                 locks[i].context.pid,
1795                                 locks[i].lock_type,
1796                                 locks[i].lock_flav,
1797                                 locks[i].start,
1798                                 locks[i].size,
1799                                 cb->private_data);
1800                 }
1801         }
1802
1803         TALLOC_FREE(locks);
1804         return 0;
1805 }
1806
1807 /*******************************************************************
1808  Call the specified function on each lock in the database.
1809 ********************************************************************/
1810
1811 int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
1812                           enum brl_type lock_type,
1813                           enum brl_flavour lock_flav,
1814                           br_off start, br_off size,
1815                           void *private_data),
1816                void *private_data)
1817 {
1818         struct brl_forall_cb cb;
1819         NTSTATUS status;
1820         int count = 0;
1821
1822         if (!brlock_db) {
1823                 return 0;
1824         }
1825         cb.fn = fn;
1826         cb.private_data = private_data;
1827         status = dbwrap_traverse(brlock_db, brl_traverse_fn, &cb, &count);
1828
1829         if (!NT_STATUS_IS_OK(status)) {
1830                 return -1;
1831         } else {
1832                 return count;
1833         }
1834 }
1835
1836 /*******************************************************************
1837  Store a potentially modified set of byte range lock data back into
1838  the database.
1839  Unlock the record.
1840 ********************************************************************/
1841
1842 static void byte_range_lock_flush(struct byte_range_lock *br_lck)
1843 {
1844         size_t data_len;
1845         unsigned i;
1846         struct lock_struct *locks = br_lck->lock_data;
1847
1848         if (!br_lck->modified) {
1849                 DEBUG(10, ("br_lck not modified\n"));
1850                 goto done;
1851         }
1852
1853         i = 0;
1854
1855         while (i < br_lck->num_locks) {
1856                 if (locks[i].context.pid.pid == 0) {
1857                         /*
1858                          * Autocleanup, the process conflicted and does not
1859                          * exist anymore.
1860                          */
1861                         locks[i] = locks[br_lck->num_locks-1];
1862                         br_lck->num_locks -= 1;
1863                 } else {
1864                         i += 1;
1865                 }
1866         }
1867
1868         data_len = br_lck->num_locks * sizeof(struct lock_struct);
1869
1870         if (br_lck->have_read_oplocks) {
1871                 data_len += 1;
1872         }
1873
1874         DEBUG(10, ("data_len=%d\n", (int)data_len));
1875
1876         if (data_len == 0) {
1877                 /* No locks - delete this entry. */
1878                 NTSTATUS status = dbwrap_record_delete(br_lck->record);
1879                 if (!NT_STATUS_IS_OK(status)) {
1880                         DEBUG(0, ("delete_rec returned %s\n",
1881                                   nt_errstr(status)));
1882                         smb_panic("Could not delete byte range lock entry");
1883                 }
1884         } else {
1885                 TDB_DATA data;
1886                 NTSTATUS status;
1887
1888                 data.dsize = data_len;
1889                 data.dptr = talloc_array(talloc_tos(), uint8_t, data_len);
1890                 SMB_ASSERT(data.dptr != NULL);
1891
1892                 memcpy(data.dptr, br_lck->lock_data,
1893                        br_lck->num_locks * sizeof(struct lock_struct));
1894
1895                 if (br_lck->have_read_oplocks) {
1896                         data.dptr[data_len-1] = 1;
1897                 }
1898
1899                 status = dbwrap_record_store(br_lck->record, data, TDB_REPLACE);
1900                 TALLOC_FREE(data.dptr);
1901                 if (!NT_STATUS_IS_OK(status)) {
1902                         DEBUG(0, ("store returned %s\n", nt_errstr(status)));
1903                         smb_panic("Could not store byte range mode entry");
1904                 }
1905         }
1906
1907         DEBUG(10, ("seqnum=%d\n", dbwrap_get_seqnum(brlock_db)));
1908
1909  done:
1910         br_lck->modified = false;
1911         TALLOC_FREE(br_lck->record);
1912 }
1913
1914 static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
1915 {
1916         byte_range_lock_flush(br_lck);
1917         return 0;
1918 }
1919
1920 /*******************************************************************
1921  Fetch a set of byte range lock data from the database.
1922  Leave the record locked.
1923  TALLOC_FREE(brl) will release the lock in the destructor.
1924 ********************************************************************/
1925
1926 struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx, files_struct *fsp)
1927 {
1928         TDB_DATA key, data;
1929         struct byte_range_lock *br_lck = talloc(mem_ctx, struct byte_range_lock);
1930
1931         if (br_lck == NULL) {
1932                 return NULL;
1933         }
1934
1935         br_lck->fsp = fsp;
1936         br_lck->num_locks = 0;
1937         br_lck->have_read_oplocks = false;
1938         br_lck->modified = False;
1939
1940         key.dptr = (uint8 *)&fsp->file_id;
1941         key.dsize = sizeof(struct file_id);
1942
1943         br_lck->record = dbwrap_fetch_locked(brlock_db, br_lck, key);
1944
1945         if (br_lck->record == NULL) {
1946                 DEBUG(3, ("Could not lock byte range lock entry\n"));
1947                 TALLOC_FREE(br_lck);
1948                 return NULL;
1949         }
1950
1951         data = dbwrap_record_get_value(br_lck->record);
1952
1953         br_lck->lock_data = NULL;
1954
1955         talloc_set_destructor(br_lck, byte_range_lock_destructor);
1956
1957         br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
1958
1959         if (br_lck->num_locks != 0) {
1960                 br_lck->lock_data = talloc_array(
1961                         br_lck, struct lock_struct, br_lck->num_locks);
1962                 if (br_lck->lock_data == NULL) {
1963                         DEBUG(0, ("malloc failed\n"));
1964                         TALLOC_FREE(br_lck);
1965                         return NULL;
1966                 }
1967
1968                 memcpy(br_lck->lock_data, data.dptr,
1969                        talloc_get_size(br_lck->lock_data));
1970         }
1971
1972         DEBUG(10, ("data.dsize=%d\n", (int)data.dsize));
1973
1974         if ((data.dsize % sizeof(struct lock_struct)) == 1) {
1975                 br_lck->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
1976         }
1977
1978         if (DEBUGLEVEL >= 10) {
1979                 unsigned int i;
1980                 struct lock_struct *locks = br_lck->lock_data;
1981                 DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
1982                         br_lck->num_locks,
1983                           file_id_string_tos(&fsp->file_id)));
1984                 for( i = 0; i < br_lck->num_locks; i++) {
1985                         print_lock_struct(i, &locks[i]);
1986                 }
1987         }
1988
1989         return br_lck;
1990 }
1991
1992 struct brl_get_locks_readonly_state {
1993         TALLOC_CTX *mem_ctx;
1994         struct byte_range_lock **br_lock;
1995 };
1996
1997 static void brl_get_locks_readonly_parser(TDB_DATA key, TDB_DATA data,
1998                                           void *private_data)
1999 {
2000         struct brl_get_locks_readonly_state *state =
2001                 (struct brl_get_locks_readonly_state *)private_data;
2002         struct byte_range_lock *br_lock;
2003
2004         br_lock = talloc_pooled_object(
2005                 state->mem_ctx, struct byte_range_lock, 1, data.dsize);
2006         if (br_lock == NULL) {
2007                 *state->br_lock = NULL;
2008                 return;
2009         }
2010         br_lock->lock_data = (struct lock_struct *)talloc_memdup(
2011                 br_lock, data.dptr, data.dsize);
2012         br_lock->num_locks = data.dsize / sizeof(struct lock_struct);
2013
2014         if ((data.dsize % sizeof(struct lock_struct)) == 1) {
2015                 br_lock->have_read_oplocks = (data.dptr[data.dsize-1] == 1);
2016         } else {
2017                 br_lock->have_read_oplocks = false;
2018         }
2019
2020         DEBUG(10, ("Got %d bytes, have_read_oplocks: %s\n", (int)data.dsize,
2021                    br_lock->have_read_oplocks ? "true" : "false"));
2022
2023         *state->br_lock = br_lock;
2024 }
2025
2026 struct byte_range_lock *brl_get_locks_readonly(files_struct *fsp)
2027 {
2028         struct byte_range_lock *br_lock = NULL;
2029         struct byte_range_lock *rw = NULL;
2030
2031         DEBUG(10, ("seqnum=%d, fsp->brlock_seqnum=%d\n",
2032                    dbwrap_get_seqnum(brlock_db), fsp->brlock_seqnum));
2033
2034         if ((fsp->brlock_rec != NULL)
2035             && (dbwrap_get_seqnum(brlock_db) == fsp->brlock_seqnum)) {
2036                 /*
2037                  * We have cached the brlock_rec and the database did not
2038                  * change.
2039                  */
2040                 return fsp->brlock_rec;
2041         }
2042
2043         if (rw != NULL) {
2044                 size_t lock_data_size;
2045
2046                 /*
2047                  * Make a copy of the already retrieved and sanitized rw record
2048                  */
2049                 lock_data_size = rw->num_locks * sizeof(struct lock_struct);
2050                 br_lock = talloc_pooled_object(
2051                         fsp, struct byte_range_lock, 1, lock_data_size);
2052                 if (br_lock == NULL) {
2053                         goto fail;
2054                 }
2055                 br_lock->have_read_oplocks = rw->have_read_oplocks;
2056                 br_lock->num_locks = rw->num_locks;
2057                 br_lock->lock_data = (struct lock_struct *)talloc_memdup(
2058                         br_lock, rw->lock_data, lock_data_size);
2059         } else {
2060                 struct brl_get_locks_readonly_state state;
2061                 NTSTATUS status;
2062
2063                 /*
2064                  * Parse the record fresh from the database
2065                  */
2066
2067                 state.mem_ctx = fsp;
2068                 state.br_lock = &br_lock;
2069
2070                 status = dbwrap_parse_record(
2071                         brlock_db,
2072                         make_tdb_data((uint8_t *)&fsp->file_id,
2073                                       sizeof(fsp->file_id)),
2074                         brl_get_locks_readonly_parser, &state);
2075
2076                 if (NT_STATUS_EQUAL(status,NT_STATUS_NOT_FOUND)) {
2077                         /*
2078                          * No locks on this file. Return an empty br_lock.
2079                          */
2080                         br_lock = talloc(fsp, struct byte_range_lock);
2081                         if (br_lock == NULL) {
2082                                 goto fail;
2083                         }
2084
2085                         br_lock->have_read_oplocks = false;
2086                         br_lock->num_locks = 0;
2087                         br_lock->lock_data = NULL;
2088
2089                 } else if (!NT_STATUS_IS_OK(status)) {
2090                         DEBUG(3, ("Could not parse byte range lock record: "
2091                                   "%s\n", nt_errstr(status)));
2092                         goto fail;
2093                 }
2094                 if (br_lock == NULL) {
2095                         goto fail;
2096                 }
2097         }
2098
2099         br_lock->fsp = fsp;
2100         br_lock->modified = false;
2101         br_lock->record = NULL;
2102
2103         if (lp_clustering()) {
2104                 /*
2105                  * In the cluster case we can't cache the brlock struct
2106                  * because dbwrap_get_seqnum does not work reliably over
2107                  * ctdb. Thus we have to throw away the brlock struct soon.
2108                  */
2109                 talloc_steal(talloc_tos(), br_lock);
2110         } else {
2111                 /*
2112                  * Cache the brlock struct, invalidated when the dbwrap_seqnum
2113                  * changes. See beginning of this routine.
2114                  */
2115                 TALLOC_FREE(fsp->brlock_rec);
2116                 fsp->brlock_rec = br_lock;
2117                 fsp->brlock_seqnum = dbwrap_get_seqnum(brlock_db);
2118         }
2119
2120 fail:
2121         TALLOC_FREE(rw);
2122         return br_lock;
2123 }
2124
2125 struct brl_revalidate_state {
2126         ssize_t array_size;
2127         uint32 num_pids;
2128         struct server_id *pids;
2129 };
2130
2131 /*
2132  * Collect PIDs of all processes with pending entries
2133  */
2134
2135 static void brl_revalidate_collect(struct file_id id, struct server_id pid,
2136                                    enum brl_type lock_type,
2137                                    enum brl_flavour lock_flav,
2138                                    br_off start, br_off size,
2139                                    void *private_data)
2140 {
2141         struct brl_revalidate_state *state =
2142                 (struct brl_revalidate_state *)private_data;
2143
2144         if (!IS_PENDING_LOCK(lock_type)) {
2145                 return;
2146         }
2147
2148         add_to_large_array(state, sizeof(pid), (void *)&pid,
2149                            &state->pids, &state->num_pids,
2150                            &state->array_size);
2151 }
2152
2153 /*
2154  * qsort callback to sort the processes
2155  */
2156
2157 static int compare_procids(const void *p1, const void *p2)
2158 {
2159         const struct server_id *i1 = (const struct server_id *)p1;
2160         const struct server_id *i2 = (const struct server_id *)p2;
2161
2162         if (i1->pid < i2->pid) return -1;
2163         if (i1->pid > i2->pid) return 1;
2164         return 0;
2165 }
2166
2167 /*
2168  * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
2169  * locks so that they retry. Mainly used in the cluster code after a node has
2170  * died.
2171  *
2172  * Done in two steps to avoid double-sends: First we collect all entries in an
2173  * array, then qsort that array and only send to non-dupes.
2174  */
2175
2176 void brl_revalidate(struct messaging_context *msg_ctx,
2177                     void *private_data,
2178                     uint32_t msg_type,
2179                     struct server_id server_id,
2180                     DATA_BLOB *data)
2181 {
2182         struct brl_revalidate_state *state;
2183         uint32 i;
2184         struct server_id last_pid;
2185
2186         if (!(state = talloc_zero(NULL, struct brl_revalidate_state))) {
2187                 DEBUG(0, ("talloc failed\n"));
2188                 return;
2189         }
2190
2191         brl_forall(brl_revalidate_collect, state);
2192
2193         if (state->array_size == -1) {
2194                 DEBUG(0, ("talloc failed\n"));
2195                 goto done;
2196         }
2197
2198         if (state->num_pids == 0) {
2199                 goto done;
2200         }
2201
2202         TYPESAFE_QSORT(state->pids, state->num_pids, compare_procids);
2203
2204         ZERO_STRUCT(last_pid);
2205
2206         for (i=0; i<state->num_pids; i++) {
2207                 if (serverid_equal(&last_pid, &state->pids[i])) {
2208                         /*
2209                          * We've seen that one already
2210                          */
2211                         continue;
2212                 }
2213
2214                 messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
2215                                &data_blob_null);
2216                 last_pid = state->pids[i];
2217         }
2218
2219  done:
2220         TALLOC_FREE(state);
2221         return;
2222 }
2223
2224 bool brl_cleanup_disconnected(struct file_id fid, uint64_t open_persistent_id)
2225 {
2226         bool ret = false;
2227         TALLOC_CTX *frame = talloc_stackframe();
2228         TDB_DATA key, val;
2229         struct db_record *rec;
2230         struct lock_struct *lock;
2231         unsigned n, num;
2232         NTSTATUS status;
2233
2234         key = make_tdb_data((void*)&fid, sizeof(fid));
2235
2236         rec = dbwrap_fetch_locked(brlock_db, frame, key);
2237         if (rec == NULL) {
2238                 DEBUG(5, ("brl_cleanup_disconnected: failed to fetch record "
2239                           "for file %s\n", file_id_string(frame, &fid)));
2240                 goto done;
2241         }
2242
2243         val = dbwrap_record_get_value(rec);
2244         lock = (struct lock_struct*)val.dptr;
2245         num = val.dsize / sizeof(struct lock_struct);
2246         if (lock == NULL) {
2247                 DEBUG(10, ("brl_cleanup_disconnected: no byte range locks for "
2248                            "file %s\n", file_id_string(frame, &fid)));
2249                 ret = true;
2250                 goto done;
2251         }
2252
2253         for (n=0; n<num; n++) {
2254                 struct lock_context *ctx = &lock[n].context;
2255
2256                 if (!server_id_is_disconnected(&ctx->pid)) {
2257                         DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2258                                   "%s used by server %s, do not cleanup\n",
2259                                   file_id_string(frame, &fid),
2260                                   server_id_str(frame, &ctx->pid)));
2261                         goto done;
2262                 }
2263
2264                 if (ctx->smblctx != open_persistent_id) {
2265                         DEBUG(5, ("brl_cleanup_disconnected: byte range lock "
2266                                   "%s expected smblctx %llu but found %llu"
2267                                   ", do not cleanup\n",
2268                                   file_id_string(frame, &fid),
2269                                   (unsigned long long)open_persistent_id,
2270                                   (unsigned long long)ctx->smblctx));
2271                         goto done;
2272                 }
2273         }
2274
2275         status = dbwrap_record_delete(rec);
2276         if (!NT_STATUS_IS_OK(status)) {
2277                 DEBUG(5, ("brl_cleanup_disconnected: failed to delete record "
2278                           "for file %s from %s, open %llu: %s\n",
2279                           file_id_string(frame, &fid), dbwrap_name(brlock_db),
2280                           (unsigned long long)open_persistent_id,
2281                           nt_errstr(status)));
2282                 goto done;
2283         }
2284
2285         DEBUG(10, ("brl_cleanup_disconnected: "
2286                    "file %s cleaned up %u entries from open %llu\n",
2287                    file_id_string(frame, &fid), num,
2288                    (unsigned long long)open_persistent_id));
2289
2290         ret = true;
2291 done:
2292         talloc_free(frame);
2293         return ret;
2294 }