Fix to allow blocking lock notification to be done rapidly (no wait
[tprouty/samba.git] / source / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24 /* This module implements a tdb based byte range locking service,
25    replacing the fcntl() based byte range locking previously
26    used. This allows us to provide the same semantics as NT */
27
28 #include "includes.h"
29
30 #define ZERO_ZERO 0
31
32 /* This contains elements that differentiate locks. The smbpid is a
33    client supplied pid, and is essentially the locking context for
34    this client */
35
36 struct lock_context {
37         uint16 smbpid;
38         uint16 tid;
39         pid_t pid;
40 };
41
42 /* The data in brlock records is an unsorted linear array of these
43    records.  It is unnecessary to store the count as tdb provides the
44    size of the record */
45
46 struct lock_struct {
47         struct lock_context context;
48         br_off start;
49         br_off size;
50         int fnum;
51         enum brl_type lock_type;
52 };
53
54 /* The key used in the brlock database. */
55
56 struct lock_key {
57         SMB_DEV_T device;
58         SMB_INO_T inode;
59 };
60
61 /* The open brlock.tdb database. */
62
63 static TDB_CONTEXT *tdb;
64
65 /****************************************************************************
66  Create a locking key - ensuring zero filled for pad purposes.
67 ****************************************************************************/
68
69 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
70 {
71         static struct lock_key key;
72         TDB_DATA kbuf;
73
74         memset(&key, '\0', sizeof(key));
75         key.device = dev;
76         key.inode = inode;
77         kbuf.dptr = (char *)&key;
78         kbuf.dsize = sizeof(key);
79         return kbuf;
80 }
81
82 /****************************************************************************
83  See if two locking contexts are equal.
84 ****************************************************************************/
85
86 static BOOL brl_same_context(struct lock_context *ctx1, 
87                              struct lock_context *ctx2)
88 {
89         return (ctx1->pid == ctx2->pid) &&
90                 (ctx1->smbpid == ctx2->smbpid) &&
91                 (ctx1->tid == ctx2->tid);
92 }
93
94 /****************************************************************************
95  See if lock2 can be added when lock1 is in place.
96 ****************************************************************************/
97
98 static BOOL brl_conflict(struct lock_struct *lck1, 
99                          struct lock_struct *lck2)
100 {
101         if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
102                 return False;
103
104         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
105                 return False;
106         }
107
108         if (brl_same_context(&lck1->context, &lck2->context) &&
109             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
110                 return False;
111         }
112
113         if (lck1->start >= (lck2->start + lck2->size) ||
114             lck2->start >= (lck1->start + lck1->size)) {
115                 return False;
116         }
117             
118         return True;
119
120
121 #if ZERO_ZERO
122 static BOOL brl_conflict1(struct lock_struct *lck1, 
123                          struct lock_struct *lck2)
124 {
125         if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
126                 return False;
127
128         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
129                 return False;
130         }
131
132         if (brl_same_context(&lck1->context, &lck2->context) &&
133             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
134                 return False;
135         }
136
137         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
138                 return True;
139         }
140
141         if (lck1->start >= (lck2->start + lck2->size) ||
142             lck2->start >= (lck1->start + lck1->size)) {
143                 return False;
144         }
145             
146         return True;
147
148 #endif
149
150 /****************************************************************************
151  Check to see if this lock conflicts, but ignore our own locks on the
152  same fnum only.
153 ****************************************************************************/
154
155 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
156 {
157         if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
158                 return False;
159
160         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
161                 return False;
162
163         /*
164          * Incoming WRITE locks conflict with existing READ locks even
165          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
166          */
167
168         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
169                 if (brl_same_context(&lck1->context, &lck2->context) &&
170                                         lck1->fnum == lck2->fnum)
171                         return False;
172         }
173
174         if (lck1->start >= (lck2->start + lck2->size) ||
175             lck2->start >= (lck1->start + lck1->size)) return False;
176             
177         return True;
178
179
180
181 #if DONT_DO_THIS
182         /* doing this traversal could kill solaris machines under high load (tridge) */
183         /* delete any dead locks */
184
185 /****************************************************************************
186  Delete a record if it is for a dead process, if check_self is true, then
187  delete any records belonging to this pid also (there shouldn't be any).
188 ****************************************************************************/
189
190 static int delete_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
191 {
192         struct lock_struct *locks;
193         int count, i;
194         BOOL check_self = *(BOOL *)state;
195         pid_t mypid = sys_getpid();
196
197         tdb_chainlock(tdb, kbuf);
198
199         locks = (struct lock_struct *)dbuf.dptr;
200
201         count = dbuf.dsize / sizeof(*locks);
202         for (i=0; i<count; i++) {
203                 struct lock_struct *lock = &locks[i];
204
205                 /* If check_self is true we want to remove our own records. */
206                 if (check_self && (mypid == lock->context.pid)) {
207
208                         DEBUG(0,("brlock : delete_fn. LOGIC ERROR ! Shutting down and a record for my pid (%u) exists !\n",
209                                         (unsigned int)lock->context.pid ));
210
211                 } else if (process_exists(lock->context.pid)) {
212
213                         DEBUG(10,("brlock : delete_fn. pid %u exists.\n", (unsigned int)lock->context.pid ));
214                         continue;
215                 }
216
217                 DEBUG(10,("brlock : delete_fn. Deleting record for process %u\n",
218                                 (unsigned int)lock->context.pid ));
219
220                 if (count > 1 && i < count-1) {
221                         memmove(&locks[i], &locks[i+1], 
222                                 sizeof(*locks)*((count-1) - i));
223                 }
224                 count--;
225                 i--;
226         }
227
228         if (count == 0) {
229                 tdb_delete(tdb, kbuf);
230         } else if (count < (dbuf.dsize / sizeof(*locks))) {
231                 dbuf.dsize = count * sizeof(*locks);
232                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
233         }
234
235         tdb_chainunlock(tdb, kbuf);
236         return 0;
237 }
238 #endif
239
240 /****************************************************************************
241  Open up the brlock.tdb database.
242 ****************************************************************************/
243
244 void brl_init(int read_only)
245 {
246         if (tdb)
247                 return;
248         tdb = tdb_open_log(lock_path("brlock.tdb"), 0,  TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST),
249                        read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
250         if (!tdb) {
251                 DEBUG(0,("Failed to open byte range locking database\n"));
252                 return;
253         }
254
255 #if DONT_DO_THIS
256         /* doing this traversal could kill solaris machines under high load (tridge) */
257         /* delete any dead locks */
258         if (!read_only) {
259                 BOOL check_self = False;
260                 tdb_traverse(tdb, delete_fn, &check_self);
261         }
262 #endif
263 }
264
265 /****************************************************************************
266  Close down the brlock.tdb database.
267 ****************************************************************************/
268
269 void brl_shutdown(int read_only)
270 {
271         if (!tdb)
272                 return;
273
274 #if DONT_DO_THIS
275         /* doing this traversal could kill solaris machines under high load (tridge) */
276         /* delete any dead locks */
277         if (!read_only) {
278                 BOOL check_self = True;
279                 tdb_traverse(tdb, delete_fn, &check_self);
280         }
281 #endif
282
283         tdb_close(tdb);
284 }
285
286 #if ZERO_ZERO
287 /****************************************************************************
288 compare two locks for sorting
289 ****************************************************************************/
290 static int lock_compare(struct lock_struct *lck1, 
291                          struct lock_struct *lck2)
292 {
293         if (lck1->start != lck2->start) return (lck1->start - lck2->start);
294         if (lck2->size != lck1->size) {
295                 return ((int)lck1->size - (int)lck2->size);
296         }
297         return 0;
298 }
299 #endif
300
301 /****************************************************************************
302  Lock a range of bytes.
303 ****************************************************************************/
304
305 NTSTATUS brl_lock(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
306                   uint16 smbpid, pid_t pid, uint16 tid,
307                   br_off start, br_off size, 
308                   enum brl_type lock_type)
309 {
310         TDB_DATA kbuf, dbuf;
311         int count, i;
312         struct lock_struct lock, *locks;
313         char *tp;
314         NTSTATUS status = NT_STATUS_OK;
315         static int last_failed = -1;
316         static br_off last_failed_start;
317
318         kbuf = locking_key(dev,ino);
319
320         dbuf.dptr = NULL;
321
322 #if !ZERO_ZERO
323         if (start == 0 && size == 0) {
324                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
325         }
326 #endif
327
328         tdb_chainlock(tdb, kbuf);
329         dbuf = tdb_fetch(tdb, kbuf);
330
331         lock.context.smbpid = smbpid;
332         lock.context.pid = pid;
333         lock.context.tid = tid;
334         lock.start = start;
335         lock.size = size;
336         lock.fnum = fnum;
337         lock.lock_type = lock_type;
338
339         if (dbuf.dptr) {
340                 /* there are existing locks - make sure they don't conflict */
341                 locks = (struct lock_struct *)dbuf.dptr;
342                 count = dbuf.dsize / sizeof(*locks);
343                 for (i=0; i<count; i++) {
344                         if (brl_conflict(&locks[i], &lock)) {
345                                 status = NT_STATUS_LOCK_NOT_GRANTED;
346                                 goto fail;
347                         }
348 #if ZERO_ZERO
349                         if (lock.start == 0 && lock.size == 0 && 
350                             locks[i].size == 0) {
351                                 break;
352                         }
353 #endif
354                 }
355         }
356
357         /* no conflicts - add it to the list of locks */
358         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*locks));
359         if (!tp) {
360                 status = NT_STATUS_NO_MEMORY;
361                 goto fail;
362         } else {
363                 dbuf.dptr = tp;
364         }
365         memcpy(dbuf.dptr + dbuf.dsize, &lock, sizeof(lock));
366         dbuf.dsize += sizeof(lock);
367
368 #if ZERO_ZERO
369         /* sort the lock list */
370         qsort(dbuf.dptr, dbuf.dsize/sizeof(lock), sizeof(lock), lock_compare);
371 #endif
372
373         tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
374
375         SAFE_FREE(dbuf.dptr);
376         tdb_chainunlock(tdb, kbuf);
377         return NT_STATUS_OK;
378
379  fail:
380         /* this is a nasty hack to try to simulate the lock result cache code in w2k.
381            It isn't completely accurate as I haven't yet worked out the correct
382            semantics (tridge)
383         */
384         if (last_failed == fnum &&
385             last_failed_start == start &&
386             NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED)) {
387                 status = NT_STATUS_FILE_LOCK_CONFLICT;
388         }
389         last_failed = fnum;
390         last_failed_start = start;
391
392         SAFE_FREE(dbuf.dptr);
393         tdb_chainunlock(tdb, kbuf);
394         return status;
395 }
396
397 /****************************************************************************
398  Check if an unlock overlaps a pending lock.
399 ****************************************************************************/
400
401 static BOOL brl_pending_overlap(struct lock_struct *lock, struct lock_struct *pend_lock)
402 {
403         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
404                 return True;
405         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
406                 return True;
407         return False;
408 }
409
410 /****************************************************************************
411  Unlock a range of bytes.
412 ****************************************************************************/
413
414 BOOL brl_unlock(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
415                 uint16 smbpid, pid_t pid, uint16 tid,
416                 br_off start, br_off size,
417                 BOOL remove_pending_locks_only)
418 {
419         TDB_DATA kbuf, dbuf;
420         int count, i, j;
421         struct lock_struct *locks;
422         struct lock_context context;
423
424         kbuf = locking_key(dev,ino);
425
426         dbuf.dptr = NULL;
427
428         tdb_chainlock(tdb, kbuf);
429         dbuf = tdb_fetch(tdb, kbuf);
430
431         if (!dbuf.dptr) {
432                 DEBUG(10,("brl_unlock: tdb_fetch failed !\n"));
433                 goto fail;
434         }
435
436         context.smbpid = smbpid;
437         context.pid = pid;
438         context.tid = tid;
439
440         /* there are existing locks - find a match */
441         locks = (struct lock_struct *)dbuf.dptr;
442         count = dbuf.dsize / sizeof(*locks);
443
444 #if ZERO_ZERO
445         for (i=0; i<count; i++) {
446                 struct lock_struct *lock = &locks[i];
447
448                 if (lock->lock_type == WRITE_LOCK &&
449                     brl_same_context(&lock->context, &context) &&
450                     lock->fnum == fnum &&
451                     lock->start == start &&
452                     lock->size == size) {
453                         /* found it - delete it */
454                         if (count == 1) {
455                                 tdb_delete(tdb, kbuf);
456                         } else {
457                                 if (i < count-1) {
458                                         memmove(&locks[i], &locks[i+1], 
459                                                 sizeof(*locks)*((count-1) - i));
460                                 }
461                                 dbuf.dsize -= sizeof(*locks);
462                                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
463                         }
464
465                         SAFE_FREE(dbuf.dptr);
466                         tdb_chainunlock(tdb, kbuf);
467                         return True;
468                 }
469         }
470 #endif
471
472         locks = (struct lock_struct *)dbuf.dptr;
473         count = dbuf.dsize / sizeof(*locks);
474         for (i=0; i<count; i++) {
475                 struct lock_struct *lock = &locks[i];
476
477                 if (brl_same_context(&lock->context, &context) &&
478                                 lock->fnum == fnum &&
479                                 lock->start == start &&
480                                 lock->size == size) {
481
482                         if (remove_pending_locks_only && lock->lock_type != PENDING_LOCK)
483                                 continue;
484
485                         if (lock->lock_type != PENDING_LOCK) {
486                                 /* Send unlock messages to any pending waiters that overlap. */
487                                 for (j=0; j<count; j++) {
488                                         struct lock_struct *pend_lock = &locks[j];
489
490                                         /* Ignore non-pending locks. */
491                                         if (pend_lock->lock_type != PENDING_LOCK)
492                                                 continue;
493
494                                         /* We could send specific lock info here... */
495                                         if (brl_pending_overlap(lock, pend_lock)) {
496                                                 DEBUG(10,("brl_unlock: sending unlock message to pid %u\n",
497                                                                         (unsigned int)pend_lock->context.pid ));
498
499                                                 message_send_pid(pend_lock->context.pid,
500                                                                 MSG_SMB_UNLOCK,
501                                                                 NULL, 0, True);
502                                         }
503                                 }
504                         }
505
506                         /* found it - delete it */
507                         if (count == 1) {
508                                 tdb_delete(tdb, kbuf);
509                         } else {
510                                 if (i < count-1) {
511                                         memmove(&locks[i], &locks[i+1], 
512                                                 sizeof(*locks)*((count-1) - i));
513                                 }
514                                 dbuf.dsize -= sizeof(*locks);
515                                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
516                         }
517
518                         SAFE_FREE(dbuf.dptr);
519                         tdb_chainunlock(tdb, kbuf);
520                         return True;
521                 }
522         }
523
524         /* we didn't find it */
525
526  fail:
527         SAFE_FREE(dbuf.dptr);
528         tdb_chainunlock(tdb, kbuf);
529         return False;
530 }
531
532
533 /****************************************************************************
534  Test if we could add a lock if we wanted to.
535 ****************************************************************************/
536
537 BOOL brl_locktest(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
538                   uint16 smbpid, pid_t pid, uint16 tid,
539                   br_off start, br_off size, 
540                   enum brl_type lock_type, int check_self)
541 {
542         TDB_DATA kbuf, dbuf;
543         int count, i;
544         struct lock_struct lock, *locks;
545
546         kbuf = locking_key(dev,ino);
547
548         dbuf.dptr = NULL;
549
550         tdb_chainlock(tdb, kbuf);
551         dbuf = tdb_fetch(tdb, kbuf);
552
553         lock.context.smbpid = smbpid;
554         lock.context.pid = pid;
555         lock.context.tid = tid;
556         lock.start = start;
557         lock.size = size;
558         lock.fnum = fnum;
559         lock.lock_type = lock_type;
560
561         if (dbuf.dptr) {
562                 /* there are existing locks - make sure they don't conflict */
563                 locks = (struct lock_struct *)dbuf.dptr;
564                 count = dbuf.dsize / sizeof(*locks);
565                 for (i=0; i<count; i++) {
566                         if (check_self) {
567                                 if (brl_conflict(&locks[i], &lock))
568                                         goto fail;
569                         } else {
570                                 /*
571                                  * Our own locks don't conflict.
572                                  */
573                                 if (brl_conflict_other(&locks[i], &lock))
574                                         goto fail;
575                         }
576                 }
577         }
578
579         /* no conflicts - we could have added it */
580         SAFE_FREE(dbuf.dptr);
581         tdb_chainunlock(tdb, kbuf);
582         return True;
583
584  fail:
585         SAFE_FREE(dbuf.dptr);
586         tdb_chainunlock(tdb, kbuf);
587         return False;
588 }
589
590 /****************************************************************************
591  Remove any locks associated with a open file.
592 ****************************************************************************/
593
594 void brl_close(SMB_DEV_T dev, SMB_INO_T ino, pid_t pid, int tid, int fnum)
595 {
596         TDB_DATA kbuf, dbuf;
597         int count, i, j, dcount=0;
598         struct lock_struct *locks;
599
600         kbuf = locking_key(dev,ino);
601
602         dbuf.dptr = NULL;
603
604         tdb_chainlock(tdb, kbuf);
605         dbuf = tdb_fetch(tdb, kbuf);
606
607         if (!dbuf.dptr) goto fail;
608
609         /* there are existing locks - remove any for this fnum */
610         locks = (struct lock_struct *)dbuf.dptr;
611         count = dbuf.dsize / sizeof(*locks);
612
613         for (i=0; i<count; i++) {
614                 struct lock_struct *lock = &locks[i];
615
616                 if (lock->context.tid == tid &&
617                     lock->context.pid == pid &&
618                     lock->fnum == fnum) {
619
620                         /* Send unlock messages to any pending waiters that overlap. */
621                         for (j=0; j<count; j++) {
622                                 struct lock_struct *pend_lock = &locks[j];
623
624                                 /* Ignore our own or non-pending locks. */
625                                 if (pend_lock->lock_type != PENDING_LOCK)
626                                         continue;
627
628                                 if (pend_lock->context.tid == tid &&
629                                     pend_lock->context.pid == pid &&
630                                     pend_lock->fnum == fnum)
631                                         continue;
632
633                                 /* We could send specific lock info here... */
634                                 if (brl_pending_overlap(lock, pend_lock))
635                                         message_send_pid(pend_lock->context.pid,
636                                                         MSG_SMB_UNLOCK,
637                                                         NULL, 0, True);
638                         }
639
640                         /* found it - delete it */
641                         if (count > 1 && i < count-1) {
642                                 memmove(&locks[i], &locks[i+1], 
643                                         sizeof(*locks)*((count-1) - i));
644                         }
645                         count--;
646                         i--;
647                         dcount++;
648                 }
649         }
650
651         if (count == 0) {
652                 tdb_delete(tdb, kbuf);
653         } else if (count < (dbuf.dsize / sizeof(*locks))) {
654                 dbuf.dsize -= dcount * sizeof(*locks);
655                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
656         }
657
658         /* we didn't find it */
659  fail:
660         SAFE_FREE(dbuf.dptr);
661         tdb_chainunlock(tdb, kbuf);
662 }
663
664 /****************************************************************************
665  Traverse the whole database with this function, calling traverse_callback
666  on each lock.
667 ****************************************************************************/
668
669 static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
670 {
671         struct lock_struct *locks;
672         struct lock_key *key;
673         int i;
674
675         BRLOCK_FN(traverse_callback) = (BRLOCK_FN_CAST())state;
676
677         locks = (struct lock_struct *)dbuf.dptr;
678         key = (struct lock_key *)kbuf.dptr;
679
680         for (i=0;i<dbuf.dsize/sizeof(*locks);i++) {
681                 traverse_callback(key->device, key->inode,
682                                   locks[i].context.pid,
683                                   locks[i].lock_type,
684                                   locks[i].start,
685                                   locks[i].size);
686         }
687         return 0;
688 }
689
690 /*******************************************************************
691  Call the specified function on each lock in the database.
692 ********************************************************************/
693
694 int brl_forall(BRLOCK_FN(fn))
695 {
696         if (!tdb) return 0;
697         return tdb_traverse(tdb, traverse_fn, (void *)fn);
698 }