Changes to allow Samba3 to pass the Samba4 RAW-READ tests.
[tprouty/samba.git] / source / locking / brlock.c
1 /* 
2    Unix SMB/CIFS implementation.
3    byte range locking code
4    Updated to handle range splits/merges.
5
6    Copyright (C) Andrew Tridgell 1992-2000
7    Copyright (C) Jeremy Allison 1992-2000
8    
9    This program is free software; you can redistribute it and/or modify
10    it under the terms of the GNU General Public License as published by
11    the Free Software Foundation; either version 2 of the License, or
12    (at your option) any later version.
13    
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License for more details.
18    
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 */
23
24 /* This module implements a tdb based byte range locking service,
25    replacing the fcntl() based byte range locking previously
26    used. This allows us to provide the same semantics as NT */
27
28 #include "includes.h"
29
30 #define ZERO_ZERO 0
31
32 /* This contains elements that differentiate locks. The smbpid is a
33    client supplied pid, and is essentially the locking context for
34    this client */
35
36 struct lock_context {
37         uint16 smbpid;
38         uint16 tid;
39         pid_t pid;
40 };
41
42 /* The data in brlock records is an unsorted linear array of these
43    records.  It is unnecessary to store the count as tdb provides the
44    size of the record */
45
46 struct lock_struct {
47         struct lock_context context;
48         br_off start;
49         br_off size;
50         int fnum;
51         enum brl_type lock_type;
52 };
53
54 /* The key used in the brlock database. */
55
56 struct lock_key {
57         SMB_DEV_T device;
58         SMB_INO_T inode;
59 };
60
61 /* The open brlock.tdb database. */
62
63 static TDB_CONTEXT *tdb;
64
65 /****************************************************************************
66  Create a locking key - ensuring zero filled for pad purposes.
67 ****************************************************************************/
68
69 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
70 {
71         static struct lock_key key;
72         TDB_DATA kbuf;
73
74         memset(&key, '\0', sizeof(key));
75         key.device = dev;
76         key.inode = inode;
77         kbuf.dptr = (char *)&key;
78         kbuf.dsize = sizeof(key);
79         return kbuf;
80 }
81
82 /****************************************************************************
83  See if two locking contexts are equal.
84 ****************************************************************************/
85
86 static BOOL brl_same_context(struct lock_context *ctx1, 
87                              struct lock_context *ctx2)
88 {
89         return (ctx1->pid == ctx2->pid) &&
90                 (ctx1->smbpid == ctx2->smbpid) &&
91                 (ctx1->tid == ctx2->tid);
92 }
93
94 /****************************************************************************
95  See if lock2 can be added when lock1 is in place.
96 ****************************************************************************/
97
98 static BOOL brl_conflict(struct lock_struct *lck1, 
99                          struct lock_struct *lck2)
100 {
101         if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
102                 return False;
103
104         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
105                 return False;
106         }
107
108         if (brl_same_context(&lck1->context, &lck2->context) &&
109             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
110                 return False;
111         }
112
113         if (lck1->start >= (lck2->start + lck2->size) ||
114             lck2->start >= (lck1->start + lck1->size)) {
115                 return False;
116         }
117             
118         return True;
119
120
121 #if ZERO_ZERO
122 static BOOL brl_conflict1(struct lock_struct *lck1, 
123                          struct lock_struct *lck2)
124 {
125         if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
126                 return False;
127
128         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
129                 return False;
130         }
131
132         if (brl_same_context(&lck1->context, &lck2->context) &&
133             lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
134                 return False;
135         }
136
137         if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
138                 return True;
139         }
140
141         if (lck1->start >= (lck2->start + lck2->size) ||
142             lck2->start >= (lck1->start + lck1->size)) {
143                 return False;
144         }
145             
146         return True;
147
148 #endif
149
150 /****************************************************************************
151  Check to see if this lock conflicts, but ignore our own locks on the
152  same fnum only.
153 ****************************************************************************/
154
155 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
156 {
157         if (lck1->lock_type == PENDING_LOCK || lck2->lock_type == PENDING_LOCK )
158                 return False;
159
160         if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
161                 return False;
162
163         /*
164          * Incoming WRITE locks conflict with existing READ locks even
165          * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
166          */
167
168         if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
169                 if (brl_same_context(&lck1->context, &lck2->context) &&
170                                         lck1->fnum == lck2->fnum)
171                         return False;
172         }
173
174         if (lck1->start >= (lck2->start + lck2->size) ||
175             lck2->start >= (lck1->start + lck1->size))
176                 return False;
177             
178         return True;
179
180
181
182 #if DONT_DO_THIS
183         /* doing this traversal could kill solaris machines under high load (tridge) */
184         /* delete any dead locks */
185
186 /****************************************************************************
187  Delete a record if it is for a dead process, if check_self is true, then
188  delete any records belonging to this pid also (there shouldn't be any).
189 ****************************************************************************/
190
191 static int delete_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
192 {
193         struct lock_struct *locks;
194         int count, i;
195         BOOL check_self = *(BOOL *)state;
196         pid_t mypid = sys_getpid();
197
198         tdb_chainlock(tdb, kbuf);
199
200         locks = (struct lock_struct *)dbuf.dptr;
201
202         count = dbuf.dsize / sizeof(*locks);
203         for (i=0; i<count; i++) {
204                 struct lock_struct *lock = &locks[i];
205
206                 /* If check_self is true we want to remove our own records. */
207                 if (check_self && (mypid == lock->context.pid)) {
208
209                         DEBUG(0,("brlock : delete_fn. LOGIC ERROR ! Shutting down and a record for my pid (%u) exists !\n",
210                                         (unsigned int)lock->context.pid ));
211
212                 } else if (process_exists(lock->context.pid)) {
213
214                         DEBUG(10,("brlock : delete_fn. pid %u exists.\n", (unsigned int)lock->context.pid ));
215                         continue;
216                 }
217
218                 DEBUG(10,("brlock : delete_fn. Deleting record for process %u\n",
219                                 (unsigned int)lock->context.pid ));
220
221                 if (count > 1 && i < count-1) {
222                         memmove(&locks[i], &locks[i+1], 
223                                 sizeof(*locks)*((count-1) - i));
224                 }
225                 count--;
226                 i--;
227         }
228
229         if (count == 0) {
230                 tdb_delete(tdb, kbuf);
231         } else if (count < (dbuf.dsize / sizeof(*locks))) {
232                 dbuf.dsize = count * sizeof(*locks);
233                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
234         }
235
236         tdb_chainunlock(tdb, kbuf);
237         return 0;
238 }
239 #endif
240
241 /****************************************************************************
242  Open up the brlock.tdb database.
243 ****************************************************************************/
244
245 void brl_init(int read_only)
246 {
247         if (tdb)
248                 return;
249         tdb = tdb_open_log(lock_path("brlock.tdb"), 0,  TDB_DEFAULT|(read_only?0x0:TDB_CLEAR_IF_FIRST),
250                        read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
251         if (!tdb) {
252                 DEBUG(0,("Failed to open byte range locking database\n"));
253                 return;
254         }
255
256 #if DONT_DO_THIS
257         /* doing this traversal could kill solaris machines under high load (tridge) */
258         /* delete any dead locks */
259         if (!read_only) {
260                 BOOL check_self = False;
261                 tdb_traverse(tdb, delete_fn, &check_self);
262         }
263 #endif
264 }
265
266 /****************************************************************************
267  Close down the brlock.tdb database.
268 ****************************************************************************/
269
270 void brl_shutdown(int read_only)
271 {
272         if (!tdb)
273                 return;
274
275 #if DONT_DO_THIS
276         /* doing this traversal could kill solaris machines under high load (tridge) */
277         /* delete any dead locks */
278         if (!read_only) {
279                 BOOL check_self = True;
280                 tdb_traverse(tdb, delete_fn, &check_self);
281         }
282 #endif
283
284         tdb_close(tdb);
285 }
286
287 #if ZERO_ZERO
288 /****************************************************************************
289 compare two locks for sorting
290 ****************************************************************************/
291 static int lock_compare(struct lock_struct *lck1, 
292                          struct lock_struct *lck2)
293 {
294         if (lck1->start != lck2->start) return (lck1->start - lck2->start);
295         if (lck2->size != lck1->size) {
296                 return ((int)lck1->size - (int)lck2->size);
297         }
298         return 0;
299 }
300 #endif
301
302 /****************************************************************************
303  Lock a range of bytes.
304 ****************************************************************************/
305
306 NTSTATUS brl_lock(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
307                   uint16 smbpid, pid_t pid, uint16 tid,
308                   br_off start, br_off size, 
309                   enum brl_type lock_type, BOOL *my_lock_ctx)
310 {
311         TDB_DATA kbuf, dbuf;
312         int count, i;
313         struct lock_struct lock, *locks;
314         char *tp;
315         NTSTATUS status = NT_STATUS_OK;
316         static int last_failed = -1;
317         static br_off last_failed_start;
318
319         *my_lock_ctx = False;
320         kbuf = locking_key(dev,ino);
321
322         dbuf.dptr = NULL;
323
324 #if !ZERO_ZERO
325         if (start == 0 && size == 0) {
326                 DEBUG(0,("client sent 0/0 lock - please report this\n"));
327         }
328 #endif
329
330         tdb_chainlock(tdb, kbuf);
331         dbuf = tdb_fetch(tdb, kbuf);
332
333         lock.context.smbpid = smbpid;
334         lock.context.pid = pid;
335         lock.context.tid = tid;
336         lock.start = start;
337         lock.size = size;
338         lock.fnum = fnum;
339         lock.lock_type = lock_type;
340
341         if (dbuf.dptr) {
342                 /* there are existing locks - make sure they don't conflict */
343                 locks = (struct lock_struct *)dbuf.dptr;
344                 count = dbuf.dsize / sizeof(*locks);
345                 for (i=0; i<count; i++) {
346                         if (brl_conflict(&locks[i], &lock)) {
347                                 status = NT_STATUS_LOCK_NOT_GRANTED;
348                                 /* Did we block ourselves ? */
349                                 if (brl_same_context(&locks[i].context, &lock.context))
350                                         *my_lock_ctx = True;
351                                 goto fail;
352                         }
353 #if ZERO_ZERO
354                         if (lock.start == 0 && lock.size == 0 && 
355                             locks[i].size == 0) {
356                                 break;
357                         }
358 #endif
359                 }
360         }
361
362         /* no conflicts - add it to the list of locks */
363         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*locks));
364         if (!tp) {
365                 status = NT_STATUS_NO_MEMORY;
366                 goto fail;
367         } else {
368                 dbuf.dptr = tp;
369         }
370         memcpy(dbuf.dptr + dbuf.dsize, &lock, sizeof(lock));
371         dbuf.dsize += sizeof(lock);
372
373 #if ZERO_ZERO
374         /* sort the lock list */
375         qsort(dbuf.dptr, dbuf.dsize/sizeof(lock), sizeof(lock), lock_compare);
376 #endif
377
378         tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
379
380         SAFE_FREE(dbuf.dptr);
381         tdb_chainunlock(tdb, kbuf);
382         return NT_STATUS_OK;
383
384  fail:
385         /* this is a nasty hack to try to simulate the lock result cache code in w2k.
386            It isn't completely accurate as I haven't yet worked out the correct
387            semantics (tridge)
388         */
389         if (last_failed == fnum &&
390             last_failed_start == start &&
391             NT_STATUS_EQUAL(status, NT_STATUS_LOCK_NOT_GRANTED)) {
392                 status = NT_STATUS_FILE_LOCK_CONFLICT;
393         }
394         last_failed = fnum;
395         last_failed_start = start;
396
397         SAFE_FREE(dbuf.dptr);
398         tdb_chainunlock(tdb, kbuf);
399         return status;
400 }
401
402 /****************************************************************************
403  Check if an unlock overlaps a pending lock.
404 ****************************************************************************/
405
406 static BOOL brl_pending_overlap(struct lock_struct *lock, struct lock_struct *pend_lock)
407 {
408         if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
409                 return True;
410         if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
411                 return True;
412         return False;
413 }
414
415 /****************************************************************************
416  Unlock a range of bytes.
417 ****************************************************************************/
418
419 BOOL brl_unlock(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
420                 uint16 smbpid, pid_t pid, uint16 tid,
421                 br_off start, br_off size,
422                 BOOL remove_pending_locks_only,
423                 void (*pre_unlock_fn)(void *),
424                 void *pre_unlock_data)
425 {
426         TDB_DATA kbuf, dbuf;
427         int count, i, j;
428         struct lock_struct *locks;
429         struct lock_context context;
430
431         kbuf = locking_key(dev,ino);
432
433         dbuf.dptr = NULL;
434
435         tdb_chainlock(tdb, kbuf);
436         dbuf = tdb_fetch(tdb, kbuf);
437
438         if (!dbuf.dptr) {
439                 DEBUG(10,("brl_unlock: tdb_fetch failed !\n"));
440                 goto fail;
441         }
442
443         context.smbpid = smbpid;
444         context.pid = pid;
445         context.tid = tid;
446
447         /* there are existing locks - find a match */
448         locks = (struct lock_struct *)dbuf.dptr;
449         count = dbuf.dsize / sizeof(*locks);
450
451 #if ZERO_ZERO
452         for (i=0; i<count; i++) {
453                 struct lock_struct *lock = &locks[i];
454
455                 if (lock->lock_type == WRITE_LOCK &&
456                     brl_same_context(&lock->context, &context) &&
457                     lock->fnum == fnum &&
458                     lock->start == start &&
459                     lock->size == size) {
460
461                         if (pre_unlock_fn)
462                                 (*pre_unlock_fn)(pre_unlock_data);
463
464                         /* found it - delete it */
465                         if (count == 1) {
466                                 tdb_delete(tdb, kbuf);
467                         } else {
468                                 if (i < count-1) {
469                                         memmove(&locks[i], &locks[i+1], 
470                                                 sizeof(*locks)*((count-1) - i));
471                                 }
472                                 dbuf.dsize -= sizeof(*locks);
473                                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
474                         }
475
476                         SAFE_FREE(dbuf.dptr);
477                         tdb_chainunlock(tdb, kbuf);
478                         return True;
479                 }
480         }
481 #endif
482
483         locks = (struct lock_struct *)dbuf.dptr;
484         count = dbuf.dsize / sizeof(*locks);
485         for (i=0; i<count; i++) {
486                 struct lock_struct *lock = &locks[i];
487
488                 if (brl_same_context(&lock->context, &context) &&
489                                 lock->fnum == fnum &&
490                                 lock->start == start &&
491                                 lock->size == size) {
492
493                         if (remove_pending_locks_only && lock->lock_type != PENDING_LOCK)
494                                 continue;
495
496                         if (lock->lock_type != PENDING_LOCK) {
497
498                                 /* Do any POSIX unlocks needed. */
499                                 if (pre_unlock_fn)
500                                         (*pre_unlock_fn)(pre_unlock_data);
501
502                                 /* Send unlock messages to any pending waiters that overlap. */
503                                 for (j=0; j<count; j++) {
504                                         struct lock_struct *pend_lock = &locks[j];
505
506                                         /* Ignore non-pending locks. */
507                                         if (pend_lock->lock_type != PENDING_LOCK)
508                                                 continue;
509
510                                         /* We could send specific lock info here... */
511                                         if (brl_pending_overlap(lock, pend_lock)) {
512                                                 DEBUG(10,("brl_unlock: sending unlock message to pid %u\n",
513                                                                         (unsigned int)pend_lock->context.pid ));
514
515                                                 message_send_pid(pend_lock->context.pid,
516                                                                 MSG_SMB_UNLOCK,
517                                                                 NULL, 0, True);
518                                         }
519                                 }
520                         }
521
522                         /* found it - delete it */
523                         if (count == 1) {
524                                 tdb_delete(tdb, kbuf);
525                         } else {
526                                 if (i < count-1) {
527                                         memmove(&locks[i], &locks[i+1], 
528                                                 sizeof(*locks)*((count-1) - i));
529                                 }
530                                 dbuf.dsize -= sizeof(*locks);
531                                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
532                         }
533
534                         SAFE_FREE(dbuf.dptr);
535                         tdb_chainunlock(tdb, kbuf);
536                         return True;
537                 }
538         }
539
540         /* we didn't find it */
541
542  fail:
543         SAFE_FREE(dbuf.dptr);
544         tdb_chainunlock(tdb, kbuf);
545         return False;
546 }
547
548
549 /****************************************************************************
550  Test if we could add a lock if we wanted to.
551 ****************************************************************************/
552
553 BOOL brl_locktest(SMB_DEV_T dev, SMB_INO_T ino, int fnum,
554                   uint16 smbpid, pid_t pid, uint16 tid,
555                   br_off start, br_off size, 
556                   enum brl_type lock_type, int check_self)
557 {
558         TDB_DATA kbuf, dbuf;
559         int count, i;
560         struct lock_struct lock, *locks;
561
562         kbuf = locking_key(dev,ino);
563
564         dbuf.dptr = NULL;
565
566         tdb_chainlock(tdb, kbuf);
567         dbuf = tdb_fetch(tdb, kbuf);
568
569         lock.context.smbpid = smbpid;
570         lock.context.pid = pid;
571         lock.context.tid = tid;
572         lock.start = start;
573         lock.size = size;
574         lock.fnum = fnum;
575         lock.lock_type = lock_type;
576
577         if (dbuf.dptr) {
578                 /* there are existing locks - make sure they don't conflict */
579                 locks = (struct lock_struct *)dbuf.dptr;
580                 count = dbuf.dsize / sizeof(*locks);
581                 for (i=0; i<count; i++) {
582                         if (check_self) {
583                                 if (brl_conflict(&locks[i], &lock))
584                                         goto fail;
585                         } else {
586                                 /*
587                                  * Our own locks don't conflict.
588                                  */
589                                 if (brl_conflict_other(&locks[i], &lock))
590                                         goto fail;
591                         }
592                 }
593         }
594
595         /* no conflicts - we could have added it */
596         SAFE_FREE(dbuf.dptr);
597         tdb_chainunlock(tdb, kbuf);
598         return True;
599
600  fail:
601         SAFE_FREE(dbuf.dptr);
602         tdb_chainunlock(tdb, kbuf);
603         return False;
604 }
605
606 /****************************************************************************
607  Remove any locks associated with a open file.
608 ****************************************************************************/
609
610 void brl_close(SMB_DEV_T dev, SMB_INO_T ino, pid_t pid, int tid, int fnum)
611 {
612         TDB_DATA kbuf, dbuf;
613         int count, i, j, dcount=0;
614         struct lock_struct *locks;
615
616         kbuf = locking_key(dev,ino);
617
618         dbuf.dptr = NULL;
619
620         tdb_chainlock(tdb, kbuf);
621         dbuf = tdb_fetch(tdb, kbuf);
622
623         if (!dbuf.dptr) goto fail;
624
625         /* there are existing locks - remove any for this fnum */
626         locks = (struct lock_struct *)dbuf.dptr;
627         count = dbuf.dsize / sizeof(*locks);
628
629         for (i=0; i<count; i++) {
630                 struct lock_struct *lock = &locks[i];
631
632                 if (lock->context.tid == tid &&
633                     lock->context.pid == pid &&
634                     lock->fnum == fnum) {
635
636                         /* Send unlock messages to any pending waiters that overlap. */
637                         for (j=0; j<count; j++) {
638                                 struct lock_struct *pend_lock = &locks[j];
639
640                                 /* Ignore our own or non-pending locks. */
641                                 if (pend_lock->lock_type != PENDING_LOCK)
642                                         continue;
643
644                                 if (pend_lock->context.tid == tid &&
645                                     pend_lock->context.pid == pid &&
646                                     pend_lock->fnum == fnum)
647                                         continue;
648
649                                 /* We could send specific lock info here... */
650                                 if (brl_pending_overlap(lock, pend_lock))
651                                         message_send_pid(pend_lock->context.pid,
652                                                         MSG_SMB_UNLOCK,
653                                                         NULL, 0, True);
654                         }
655
656                         /* found it - delete it */
657                         if (count > 1 && i < count-1) {
658                                 memmove(&locks[i], &locks[i+1], 
659                                         sizeof(*locks)*((count-1) - i));
660                         }
661                         count--;
662                         i--;
663                         dcount++;
664                 }
665         }
666
667         if (count == 0) {
668                 tdb_delete(tdb, kbuf);
669         } else if (count < (dbuf.dsize / sizeof(*locks))) {
670                 dbuf.dsize -= dcount * sizeof(*locks);
671                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
672         }
673
674         /* we didn't find it */
675  fail:
676         SAFE_FREE(dbuf.dptr);
677         tdb_chainunlock(tdb, kbuf);
678 }
679
680 /****************************************************************************
681  Traverse the whole database with this function, calling traverse_callback
682  on each lock.
683 ****************************************************************************/
684
685 static int traverse_fn(TDB_CONTEXT *ttdb, TDB_DATA kbuf, TDB_DATA dbuf, void *state)
686 {
687         struct lock_struct *locks;
688         struct lock_key *key;
689         int i;
690
691         BRLOCK_FN(traverse_callback) = (BRLOCK_FN_CAST())state;
692
693         locks = (struct lock_struct *)dbuf.dptr;
694         key = (struct lock_key *)kbuf.dptr;
695
696         for (i=0;i<dbuf.dsize/sizeof(*locks);i++) {
697                 traverse_callback(key->device, key->inode,
698                                   locks[i].context.pid,
699                                   locks[i].lock_type,
700                                   locks[i].start,
701                                   locks[i].size);
702         }
703         return 0;
704 }
705
706 /*******************************************************************
707  Call the specified function on each lock in the database.
708 ********************************************************************/
709
710 int brl_forall(BRLOCK_FN(fn))
711 {
712         if (!tdb) return 0;
713         return tdb_traverse(tdb, traverse_fn, (void *)fn);
714 }