Split off of POSIX locking into separate unit as Andrew requested.
[ira/wip.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28 extern int global_smbpid;
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of ints
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76         key.dev = dev;
77         key.inode = inode;
78         kbuf.dptr = (char *)&key;
79         kbuf.dsize = sizeof(key);
80         return kbuf;
81 }
82
83 /*******************************************************************
84  Convenience function to get a key from an fsp.
85 ******************************************************************/
86
87 static TDB_DATA locking_key_fsp(files_struct *fsp)
88 {
89         return locking_key(fsp->dev, fsp->inode);
90 }
91
92 /****************************************************************************
93  Add an fd to the pending close tdb.
94 ****************************************************************************/
95
96 static BOOL add_fd_to_close_entry(files_struct *fsp)
97 {
98         struct posix_lock_key = locking_key_fsp(fsp);
99         TDB_DATA kbuf, dbuf;
100         size_t count = 0;
101         int *fd_array = NULL;
102
103         dbuf.dptr = NULL;
104
105         tdb_lockchain(posix_pending_close_tdb, kbuf);
106         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
107
108         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
109         if (!dbuf.dptr) {
110                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
111                 tdb_unlockchain(posix_pending_close_tdb, kbuf);
112                 return False;
113         }
114         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
115         dbuf.dsize += sizeof(int);
116
117         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
118                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
119         }
120
121         free(dbuf.dptr);
122         tdb_unlockchain(posix_pending_close_tdb, kbuf);
123         return True;
124 }
125
126 /****************************************************************************
127  Remove all fd entries for a specific dev/inode pair from the tdb.
128 ****************************************************************************/
129
130 static void delete_close_entries(files_struct *fsp)
131 {
132     struct posix_lock_key = locking_key_fsp(fsp);
133     TDB_DATA kbuf, dbuf;
134
135         tdb_lockchain(posix_pending_close_tdb, kbuf);
136         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
137                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
138         tdb_unlockchain(posix_pending_close_tdb, kbuf);
139 }
140
141 /****************************************************************************
142  Get the array of POSIX pending close records for an open fsp. Caller must
143  free. Returns number of entries.
144 ****************************************************************************/
145
146 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
147 {
148         struct posix_lock_key = locking_key_fsp(fsp);
149         TDB_DATA kbuf, dbuf;
150         size_t count = 0;
151
152         *entries = NULL;
153         dbuf.dptr = NULL;
154
155         tdb_lockchain(posix_pending_close_tdb, kbuf);
156         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
157
158     if (!dbuf.dptr) {
159                 tdb_unlockchain(posix_pending_close_tdb, kbuf);
160                 return 0;
161         }
162
163         *entries = (int *)dbuf.dptr;
164         count = (size_t)(dbuf.dsize / sizeof(int));
165
166         tdb_unlockchain(posix_pending_close_tdb, kbuf);
167
168         return count;
169 }
170
171 /****************************************************************************
172  Get the array of POSIX locks for an fsp. Caller must free. Returns
173  number of entries.
174 ****************************************************************************/
175
176 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
177 {
178         struct posix_lock_key = locking_key_fsp(fsp);
179         TDB_DATA kbuf, dbuf;
180         size_t count = 0;
181
182         *entries = NULL;
183
184         dbuf.dptr = NULL;
185
186         tdb_lockchain(posix_lock_tdb, kbuf);
187         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
188
189     if (!dbuf.dptr) {
190                 tdb_unlockchain(posix_lock_tdb, kbuf);
191                 return 0;
192         }
193
194         *entries = (struct posix_lock_struct *)dbuf.dptr;
195         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock_struct));
196
197     tdb_unlockchain(posix_lock_tdb, kbuf);
198
199         return count;
200 }
201
202 /****************************************************************************
203  Deal with pending closes needed by POSIX locking support.
204 ****************************************************************************/
205
206 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
207 {
208         int saved_errno = 0;
209         int ret;
210         size_t count, i;
211         struct posix_lock *entries = NULL;
212         int *fd_array = NULL;
213
214         if (!lp_posix_locking(SNUM(conn))) {
215                 /*
216                  * No POSIX to worry about, just close.
217                  */
218                 ret = conn->vfs_ops.close(fsp->fd);
219                 fsp->fd = -1;
220                 return ret;
221         }
222
223         /*
224          * Get the number of outstanding POSIX locks on this dev/inode pair.
225          */
226
227         count = get_posix_lock_entries(fsp, &entries);
228         
229         if (count) {
230
231                 /*
232                  * There are outstanding locks on this dev/inode pair on other fds.
233                  * Add our fd to the pending close tdb and set fsp->fd to -1.
234                  */
235
236                 if (!add_fd_to_close_entry(fsp)) {
237                         free((char *)entries);
238                         return False;
239                 }
240
241                 free((char *)entries);
242                 fsp->fd = -1;
243                 return 0;
244         }
245
246         if(entries)
247                 free((char *)entries);
248
249         /*
250          * No outstanding POSIX locks. Get the pending close fd's
251          * from the tdb and close them all.
252          */
253
254         count = get_posix_pending_close_entries(fsp, &fd_array)
255
256         if (count) {
257                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
258
259                 for(i = 0; i < count; i++) {
260                         if (conn->vfs_ops.close(fd_array[i]) == -1) {
261                                 saved_errno = errno;
262                         }
263                 }
264
265                 if (fd_array)
266                         free((char *)fd_array);
267
268                 /*
269                  * Delete all fd's stored in the tdb
270                  * for this dev/inode pair.
271                  */
272
273                 delete_close_entries(fsp);
274         }
275
276         if (fd_array)
277                 free((char *)fd_array);
278
279         /*
280          * Finally close the fd associated with this fsp.
281          */
282
283         ret = conn->vfs_ops.close(fsp->fd);
284
285         if (saved_errno != 0) {
286         errno = saved_errno;
287                 ret = -1;
288     } 
289
290         fsp->fd = -1;
291
292         return ret;
293 }
294
295 /****************************************************************************
296  Debugging aid :-).
297 ****************************************************************************/
298
299 static const char *posix_lock_type_name(int lock_type)
300 {
301         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
302 }
303
304 /****************************************************************************
305  Add an entry into the POSIX locking tdb.
306 ****************************************************************************/
307
308 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type)
309 {
310         struct posix_lock_key = locking_key_fsp(fsp);
311         TDB_DATA kbuf, dbuf;
312         struct posix_lock pl;
313
314         /*
315          * Now setup the new record.
316          */
317
318         pl.fd = fsp->fd;
319         pl.start = start;
320         pl.size = size;
321         pl.lock_type = lock_type;
322
323         dbuf.dptr = NULL;
324
325         tdb_lockchain(posix_lock_tdb, kbuf);
326         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
327
328         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(*pl));
329         if (!dbuf.dptr) {
330                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
331                 goto fail;
332         }
333
334         memcpy(dbuf.dptr + dbuf.dsize, rec, sizeof(*pl));
335         dbuf.dsize += sizeof(*pl);
336
337         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
338                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
339                 goto fail;
340         }
341
342     free(dbuf.dptr);
343     tdb_unlockchain(posix_lock_tdb, kbuf);
344
345         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f:dev=%.0f inode=%.0f\n",
346                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
347                         (double)fsp->dev, (double)fsp->inode ));
348
349     return True;
350
351  fail:
352     if (dbuf.dptr)
353                 free(dbuf.dptr);
354     tdb_unlockchain(tdb, kbuf);
355     return False;
356 }
357
358 /****************************************************************************
359  Delete an entry from the POSIX locking tdb.
360 ****************************************************************************/
361
362 static BOOL delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size)
363 {
364         struct posix_lock_key = locking_key_fsp(fsp);
365         TDB_DATA kbuf, dbuf;
366         struct posix_lock *locks;
367         size_t i, count;
368
369         dbuf.dptr = NULL;
370
371         tdb_lockchain(posix_lock_tdb, kbuf);
372         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
373
374         if (!dbuf.dptr) {
375                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
376                 goto fail;
377         }
378
379         /* There are existing locks - find a match. */
380         locks = (struct lock_struct *)dbuf.dptr;
381         count = (size_t(dbuf.dsize / sizeof(*locks));
382
383         for (i=0; i<count; i++) { 
384                 struct posix_lock *pl = &locks[i];
385
386                 if (pl->fd == fd &&
387                         pl->start == start &&
388                         pl->size == size) {
389                         /* Found it - delete it. */
390                         if (count == 1) {
391                                 tdb_delete(posix_lock_tdb, kbuf);
392                         } else {
393                                 if (i < count-1) {
394                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
395                                 }
396                                 dbuf.dsize -= sizeof(*locks);
397                                 tdb_store(tdb, kbuf, dbuf, TDB_REPLACE);
398                         }
399
400                         free(dbuf.dptr);
401                         tdb_unlockchain(tdb, kbuf);
402                         return True;
403                 }
404         }
405
406         /* We didn't find it. */
407
408  fail:
409     if (dbuf.dptr)
410                 free(dbuf.dptr);
411     tdb_unlockchain(tdb, kbuf);
412     return False;
413 }
414
415 /****************************************************************************
416  Utility function to map a lock type correctly depending on the open
417  mode of a file.
418 ****************************************************************************/
419
420 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
421 {
422         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
423                 /*
424                  * Many UNIX's cannot get a write lock on a file opened read-only.
425                  * Win32 locking semantics allow this.
426                  * Do the best we can and attempt a read-only lock.
427                  */
428                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
429                 return F_RDLCK;
430         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
431                 /*
432                  * Ditto for read locks on write only files.
433                  */
434                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
435                 return F_WRLCK;
436         }
437
438   /*
439    * This return should be the most normal, as we attempt
440    * to always open files read/write.
441    */
442
443   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
444 }
445
446 /****************************************************************************
447  Check to see if the given unsigned lock range is within the possible POSIX
448  range. Modifies the given args to be in range if possible, just returns
449  False if not.
450 ****************************************************************************/
451
452 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
453                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
454 {
455         SMB_OFF_T offset;
456         SMB_OFF_T count;
457
458 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
459
460     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
461     SMB_OFF_T mask = (mask2<<1);
462     SMB_OFF_T neg_mask = ~mask;
463
464         /*
465          * In this case SMB_OFF_T is 64 bits,
466          * and the underlying system can handle 64 bit signed locks.
467          * Cast to signed type.
468          */
469
470         offset = (SMB_OFF_T)u_offset;
471         count = (SMB_OFF_T)u_count;
472
473         /*
474          * Deal with a very common case of count of all ones.
475          * (lock entire file).
476          */
477
478         if(count == (SMB_OFF_T)-1)
479                 count &= ~mask;
480
481         /*
482          * POSIX lock ranges cannot be negative.
483          * Fail if any combination becomes negative.
484          */
485
486         if(offset < 0 || count < 0 || (offset + count < 0)) {
487                 DEBUG(10,("posix_lock_in_range: negative range: offset = %.0f, count = %.0f. Ignoring lock.\n",
488                                 (double)offset, (double)count ));
489                 return False;
490         }
491
492         /*
493          * In this case SMB_OFF_T is 64 bits, the offset and count
494          * fit within the positive range, and the underlying
495          * system can handle 64 bit locks. Just return as the
496          * cast values are ok.
497          */
498
499 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
500
501         /*
502          * In this case either SMB_OFF_T is 32 bits,
503          * or the underlying system cannot handle 64 bit signed locks.
504          * Either way we have to try and mangle to fit within 31 bits.
505          * This is difficult.
506          */
507
508 #if defined(HAVE_BROKEN_FCNTL64_LOCKS)
509
510         /*
511          * SMB_OFF_T is 64 bits, but we need to use 31 bits due to
512          * broken large locking.
513          */
514
515         /*
516          * Deal with a very common case of count of all ones.
517          * (lock entire file).
518          */
519
520         if(u_count == (SMB_BIG_UINT)-1)
521                 count = 0x7FFFFFFF;
522
523         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
524                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. offset = %.0f, count = %.0f. Ignoring lock.\n",
525                                 (double)u_offset, (double)u_count ));
526                 /* Top 32 bits of offset or count were not zero. */
527                 return False;
528         }
529
530         /* Cast from 64 bits unsigned to 64 bits signed. */
531         offset = (SMB_OFF_T)u_offset;
532         count = (SMB_OFF_T)u_count;
533
534         /*
535          * Check if we are within the 2^31 range.
536          */
537
538         {
539                 int32 low_offset = (int32)offset;
540                 int32 low_count = (int32)count;
541
542                 if(low_offset < 0 || low_count < 0 || (low_offset + low_count < 0)) {
543                         DEBUG(10,("posix_lock_in_range: not within 2^31 range. low_offset = %d, low_count = %d. Ignoring lock.\n",
544                                         low_offset, low_count ));
545                         return False;
546                 }
547         }
548
549         /*
550          * Ok - we can map from a 64 bit number to a 31 bit lock.
551          */
552
553 #else /* HAVE_BROKEN_FCNTL64_LOCKS */
554
555         /*
556          * SMB_OFF_T is 32 bits.
557          */
558
559 #if defined(HAVE_LONGLONG)
560
561         /*
562          * SMB_BIG_UINT is 64 bits, we can do a 32 bit shift.
563          */
564
565         /*
566          * Deal with a very common case of count of all ones.
567          * (lock entire file).
568          */
569
570         if(u_count == (SMB_BIG_UINT)-1)
571                 count = 0x7FFFFFFF;
572
573         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
574                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. u_offset = %.0f, u_count = %.0f. Ignoring lock.\n",
575                                 (double)u_offset, (double)u_count ));
576                 return False;
577         }
578
579         /* Cast from 64 bits unsigned to 32 bits signed. */
580         offset = (SMB_OFF_T)u_offset;
581         count = (SMB_OFF_T)u_count;
582
583         /*
584          * Check if we are within the 2^31 range.
585          */
586
587         if(offset < 0 || count < 0 || (offset + count < 0)) {
588                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
589                                 (int)offset, (int)count ));
590                 return False;
591         }
592
593 #else /* HAVE_LONGLONG */
594
595         /*
596          * SMB_BIG_UINT and SMB_OFF_T are both 32 bits,
597          * just cast.
598          */
599
600         /*
601          * Deal with a very common case of count of all ones.
602          * (lock entire file).
603          */
604
605         if(u_count == (SMB_BIG_UINT)-1)
606                 count = 0x7FFFFFFF;
607
608         /* Cast from 32 bits unsigned to 32 bits signed. */
609         offset = (SMB_OFF_T)u_offset;
610         count = (SMB_OFF_T)u_count;
611
612         /*
613          * Check if we are within the 2^31 range.
614          */
615
616         if(offset < 0 || count < 0 || (offset + count < 0)) {
617                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
618                                 (int)offset, (int)count ));
619                 return False;
620         }
621
622 #endif /* HAVE_LONGLONG */
623 #endif /* LARGE_SMB_OFF_T */
624 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
625
626         /*
627          * The mapping was successful.
628          */
629
630         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
631                         (double)offset, (double)count ));
632
633         *offset_out = offset;
634         *count_out = count;
635         
636         return True;
637 }
638
639 /****************************************************************************
640  POSIX function to see if a file region is locked. Returns True if the
641  region is locked, False otherwise.
642 ****************************************************************************/
643
644 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
645 {
646         SMB_OFF_T offset;
647         SMB_OFF_T count;
648         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
649
650         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
651                         fsp->fsp_name, (double)u_offset, (double)u_count, lock_type_name(lock_type) ));
652
653         /*
654          * If the requested lock won't fit in the POSIX range, we will
655          * never set it, so presume it is not locked.
656          */
657
658         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
659                 return False;
660
661         /*
662          * Note that most UNIX's can *test* for a write lock on
663          * a read-only fd, just not *set* a write lock on a read-only
664          * fd. So we don't need to use map_lock_type here.
665          */ 
666
667         return fcntl_lock(fsp->fd,SMB_F_GETLK,offset,count,posix_lock_type);
668 }
669
670 /****************************************************************************
671  POSIX function to acquire a lock. Returns True if the
672  lock could be granted, False if not.
673 ****************************************************************************/
674
675 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
676 {
677         SMB_OFF_T offset;
678         SMB_OFF_T count;
679         BOOL ret = True;
680         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
681
682         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
683                         fsp->fsp_name, (double)u_offset, (double)u_count, lock_type_name(lock_type) ));
684
685         /*
686          * If the requested lock won't fit in the POSIX range, we will
687          * pretend it was successful.
688          */
689
690         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
691                 return True;
692
693         /*
694          * Note that setting multiple overlapping read locks on different
695          * file descriptors will not be held separately by the kernel (POSIX
696          * braindamage), but will be merged into one continuous read lock
697          * range. We cope with this case in the release_posix_lock code
698          * below. JRA.
699          */
700
701     ret = fcntl_lock(fsp->fd,SMB_F_SETLK,offset,count,posix_lock_type);
702
703         if (ret)
704                 add_posix_lock_entry(fsp,offset,count,posix_lock_type);
705
706         return ret;
707 }
708
709 /*
710  * Structure used when splitting a lock range
711  * into a POSIX lock range. Doubly linked list.
712  */
713
714 struct unlock_list {
715     struct unlock_list *next;
716     struct unlock_list *prev;
717     SMB_OFF_T start;
718     SMB_OFF_T size;
719         int fd;
720 };
721
722 /****************************************************************************
723  Create a list of lock ranges that don't overlap a given range. Used in calculating
724  POSIX lock unlocks. This is a difficult function that requires ASCII art to
725  understand it :-).
726 ****************************************************************************/
727
728 static struct unlock_list *posix_unlock_list(TALLOC_CTX *ctx, struct unlock_list *ulhead, files_struct *fsp)
729 {
730         struct lock_key key;
731         TDB_DATA kbuf, dbuf;
732         struct lock_struct *locks;
733         int num_locks, i;
734
735         /*
736          * Setup the key for this fetch.
737          */
738         key.device = dev;
739         key.inode = ino;
740         kbuf.dptr = (char *)&key;
741         kbuf.dsize = sizeof(key);
742
743         dbuf.dptr = NULL;
744
745         tdb_lockchain(tdb, kbuf);
746         dbuf = tdb_fetch(tdb, kbuf);
747
748         if (!dbuf.dptr) {
749                 tdb_unlockchain(tdb, kbuf);
750                 return ulhead;
751         }
752         
753         locks = (struct lock_struct *)dbuf.dptr;
754         num_locks = dbuf.dsize / sizeof(*locks);
755
756         /*
757          * Check the current lock list on this dev/inode pair.
758          * Quit if the list is deleted.
759          */
760
761         DEBUG(10,("brl_unlock_list: curr: start=%.0f,size=%.0f\n",
762                 (double)ulhead->start, (double)ulhead->size ));
763
764         for (i=0; i<num_locks && ulhead; i++) {
765
766                 struct lock_struct *lock = &locks[i];
767                 struct unlock_list *ul_curr;
768
769                 /* If it's not this process, ignore it. */
770                 if (lock->context.pid != pid)
771                         continue;
772
773                 /*
774                  * Walk the unlock list, checking for overlaps. Note that
775                  * the unlock list can expand within this loop if the current
776                  * range being examined needs to be split.
777                  */
778
779                 for (ul_curr = ulhead; ul_curr;) {
780
781                         DEBUG(10,("brl_unlock_list: lock: start=%.0f,size=%.0f:",
782                                 (double)lock->start, (double)lock->size ));
783
784                         if ( (ul_curr->start >= (lock->start + lock->size)) ||
785                                  (lock->start > (ul_curr->start + ul_curr->size))) {
786
787                                 /* No overlap with this lock - leave this range alone. */
788 /*********************************************
789                                              +---------+
790                                              | ul_curr |
791                                              +---------+
792                                 +-------+
793                                 | lock  |
794                                 +-------+
795 OR....
796              +---------+
797              | ul_curr |
798              +---------+
799 **********************************************/
800
801                                 DEBUG(10,("no overlap case.\n" ));
802
803                                 ul_curr = ul_curr->next;
804
805                         } else if ( (ul_curr->start >= lock->start) &&
806                                                 (ul_curr->start + ul_curr->size <= lock->start + lock->size) ) {
807
808                                 /*
809                                  * This unlock is completely overlapped by this existing lock range
810                                  * and thus should have no effect (not be unlocked). Delete it from the list.
811                                  */
812 /*********************************************
813                 +---------+
814                 | ul_curr |
815                 +---------+
816         +---------------------------+
817         |       lock                |
818         +---------------------------+
819 **********************************************/
820                                 /* Save the next pointer */
821                                 struct unlock_list *ul_next = ul_curr->next;
822
823                                 DEBUG(10,("delete case.\n" ));
824
825                                 DLIST_REMOVE(ulhead, ul_curr);
826                                 if(ulhead == NULL)
827                                         break; /* No more list... */
828
829                                 ul_curr = ul_next;
830                                 
831                         } else if ( (ul_curr->start >= lock->start) &&
832                                                 (ul_curr->start < lock->start + lock->size) &&
833                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
834
835                                 /*
836                                  * This unlock overlaps the existing lock range at the high end.
837                                  * Truncate by moving start to existing range end and reducing size.
838                                  */
839 /*********************************************
840                 +---------------+
841                 | ul_curr       |
842                 +---------------+
843         +---------------+
844         |    lock       |
845         +---------------+
846 BECOMES....
847                         +-------+
848                         |ul_curr|
849                         +-------+
850 **********************************************/
851
852                                 ul_curr->size = (ul_curr->start + ul_curr->size) - (lock->start + lock->size);
853                                 ul_curr->start = lock->start + lock->size;
854
855                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
856                                                                 (double)ul_curr->start, (double)ul_curr->size ));
857
858                                 ul_curr = ul_curr->next;
859
860                         } else if ( (ul_curr->start < lock->start) &&
861                                                 (ul_curr->start + ul_curr->size > lock->start) ) {
862
863                                 /*
864                                  * This unlock overlaps the existing lock range at the low end.
865                                  * Truncate by reducing size.
866                                  */
867 /*********************************************
868    +---------------+
869    | ul_curr       |
870    +---------------+
871            +---------------+
872            |    lock       |
873            +---------------+
874 BECOMES....
875    +-------+
876    |ul_curr|
877    +-------+
878 **********************************************/
879
880                                 ul_curr->size = lock->start - ul_curr->start;
881
882                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
883                                                                 (double)ul_curr->start, (double)ul_curr->size ));
884
885                                 ul_curr = ul_curr->next;
886                 
887                         } else if ( (ul_curr->start < lock->start) &&
888                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
889                                 /*
890                                  * Worst case scenario. Unlock request completely overlaps an existing
891                                  * lock range. Split the request into two, push the new (upper) request
892                                  * into the dlink list, and continue with the entry after ul_new (as we
893                                  * know that ul_new will not overlap with this lock).
894                                  */
895 /*********************************************
896         +---------------------------+
897         |       ul_curr             |
898         +---------------------------+
899                 +---------+
900                 | lock    |
901                 +---------+
902 BECOMES.....
903         +-------+         +---------+
904         |ul_curr|         |ul_new   |
905         +-------+         +---------+
906 **********************************************/
907                                 struct unlock_list *ul_new = (struct unlock_list *)talloc(ctx,
908                                                                                                         sizeof(struct unlock_list));
909
910                                 if(ul_new == NULL) {
911                                         DEBUG(0,("brl_unlock_list: talloc fail.\n"));
912                                         return NULL; /* The talloc_destroy takes care of cleanup. */
913                                 }
914
915                                 ZERO_STRUCTP(ul_new);
916                                 ul_new->start = lock->start + lock->size;
917                                 ul_new->size = ul_curr->start + ul_curr->size - ul_new->start;
918                                 ul_new->smbpid = ul_curr->smbpid;
919
920                                 /* Add into the dlink list after the ul_curr point - NOT at ulhead. */
921                                 DLIST_ADD(ul_curr, ul_new);
922
923                                 /* Truncate the ul_curr. */
924                                 ul_curr->size = lock->start - ul_curr->start;
925
926                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
927 new: start=%.0f,size=%.0f\n", (double)ul_curr->start, (double)ul_curr->size,
928                                                                 (double)ul_new->start, (double)ul_new->size ));
929
930                                 ul_curr = ul_new->next;
931
932                         } else {
933
934                                 /*
935                                  * This logic case should never happen. Ensure this is the
936                                  * case by forcing an abort.... Remove in production.
937                                  */
938
939                                 smb_panic("logic flaw in cases...\n");
940                         }
941                 } /* end for ( ul_curr = ulhead; ul_curr;) */
942         } /* end for (i=0; i<num_locks && ul_head; i++) */
943
944         tdb_unlockchain(tdb, kbuf);
945
946         if (dbuf.dptr)
947                 free(dbuf.dptr);
948         
949         return ulhead;
950 }
951
952 /****************************************************************************
953  POSIX function to release a lock given a list. Returns True if the
954  lock could be released, False if not.
955 ****************************************************************************/
956
957 static BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
958 {
959         SMB_OFF_T offset;
960         SMB_OFF_T count;
961         BOOL ret = True;
962         TALLOC_CTX *ul_ctx = NULL;
963         struct unlock_list *ulist = NULL;
964         struct unlock_list *ul = NULL;
965
966         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
967                 fsp->fsp_name, (double)offset, (double)count ));
968
969         /*
970          * If the requested lock won't fit in the POSIX range, we will
971          * pretend it was successful.
972          */
973
974         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
975                 return True;
976
977         if ((ul_ctx = talloc_init()) == NULL) {
978         DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
979                 return True; /* Not a fatal error. */
980         }
981
982         if ((ul = (struct unlock_list *)talloc(ul_ctx, sizeof(struct unlock_list))) == NULL) {
983                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
984                 talloc_destroy(ul_ctx);
985                 return True; /* Not a fatal error. */
986         }
987
988         /*
989          * Create the initial list entry containing the
990          * lock we want to remove.
991          */
992
993         ZERO_STRUCTP(ul);
994         ul->start = offset;
995         ul->size = count;
996         ul->fd = fsp->fd;
997
998         DLIST_ADD(ulist, ul);
999
1000         /*
1001          * The following call calculates if there are any
1002          * overlapping read locks held by this process on
1003          * other fd's open on the same file and creates a
1004          * list of unlock ranges that will allow other
1005          * POSIX lock ranges to remain on the file whilst the
1006          * unlocks are performed.
1007          */
1008
1009         ulist = posix_unlock_list(ul_ctx, ulist, fsp);
1010
1011         /*
1012          * Release the POSIX locks on the list of ranges returned.
1013          */
1014
1015         for(; ulist; ulist = ulist->next) {
1016                 SMB_OFF_T offset = ulist->start;
1017                 SMB_OFF_T count = ulist->size;
1018
1019                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1020                         (double)offset, (double)count ));
1021
1022                 if(u_count == 0) {
1023
1024                         /*
1025                          * This lock must overlap with an existing read-only lock
1026                          * held by another fd. Don't do any POSIX call.
1027                          */
1028
1029                         continue;
1030                 }
1031
1032                 /*
1033                  * If the requested lock won't fit in the POSIX range, we will
1034                  * pretend it was successful.
1035                  */
1036
1037                 if(!posix_lock_in_range(&offset, &count, offset, count))
1038                         continue;
1039
1040                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1041                         (double)offset, (double)count ));
1042
1043                 ret = fcntl_lock(fsp->fd,SMB_F_SETLK,offset,count,F_UNLCK);
1044         }
1045
1046     talloc_destroy(ul_ctx);
1047
1048         /*
1049          * We treat this as one unlock request for POSIX accounting purposes even
1050          * if it may have been split into multiple smaller POSIX unlock ranges.
1051          */ 
1052
1053         delete_posix_lock_entry(fsp->
1054
1055         return ret;
1056 }
1057
1058 /****************************************************************************
1059  Return a lock list associated with an open file.
1060 ****************************************************************************/
1061
1062 struct unlock_list *brl_getlocklist( TALLOC_CTX *ctx, SMB_DEV_T dev, SMB_INO_T ino, pid_t pid, int tid, int fnum)
1063 {
1064         struct lock_key key;
1065         TDB_DATA kbuf, dbuf;
1066         int i, count;
1067         struct lock_struct *locks;
1068         struct unlock_list *ulist = NULL;
1069
1070         key.device = dev;
1071         key.inode = ino;
1072         kbuf.dptr = (char *)&key;
1073         kbuf.dsize = sizeof(key);
1074
1075         dbuf.dptr = NULL;
1076
1077         tdb_lockchain(tdb, kbuf);
1078         dbuf = tdb_fetch(tdb, kbuf);
1079
1080         if (!dbuf.dptr) {
1081                 tdb_unlockchain(tdb, kbuf);
1082                 return NULL;
1083         }
1084
1085         /* There are existing locks - allocate an entry for each one. */
1086         locks = (struct lock_struct *)dbuf.dptr;
1087         count = dbuf.dsize / sizeof(*locks);
1088
1089         for (i=0; i<count; i++) {
1090                 struct lock_struct *lock = &locks[i];
1091
1092                 if (lock->context.tid == tid &&
1093                     lock->context.pid == pid &&
1094                     lock->fnum == fnum) {
1095
1096                                 struct unlock_list *ul_new = (struct unlock_list *)talloc(ctx,
1097                                                                                                         sizeof(struct unlock_list));
1098
1099                                 if(ul_new == NULL) {
1100                                         DEBUG(0,("brl_getlocklist: talloc fail.\n"));
1101                                         return NULL; /* The talloc_destroy takes care of cleanup. */
1102                                 }
1103
1104                                 ZERO_STRUCTP(ul_new);
1105                                 ul_new->start = lock->start;
1106                                 ul_new->size = lock->size;
1107                                 ul_new->smbpid = lock->context.smbpid;
1108
1109                                 DLIST_ADD(ulist, ul_new);
1110                 }
1111         }
1112
1113         if (dbuf.dptr)
1114                 free(dbuf.dptr);
1115         tdb_unlockchain(tdb, kbuf);
1116
1117         return ulist;
1118 }
1119
1120 /****************************************************************************
1121  Remove any locks on this fd. Called from file_close().
1122 ****************************************************************************/
1123
1124 void posix_locking_close_file(files_struct *fsp)
1125 {
1126         TALLOC_CTX *ul_ctx = NULL;
1127         struct unlock_list *ul = NULL;
1128         int eclass;
1129         uint32 ecode;
1130         struct pending_closes *pc;
1131
1132                 /*
1133                  * Optimization for the common case where we are the only
1134                  * opener of a file. If all fd entries are our own, we don't
1135                  * need to explicitly release all the locks via the POSIX functions,
1136                  * we can just release all the brl locks, as in the no POSIX locking case.
1137                  */
1138
1139                 if ((pc = find_pending_close_entry(fsp->dev, fsp->inode)) != NULL) {
1140
1141                         if (pc->fd_array_size == 1 && pc->fd_array[0] == fsp->fd ) {
1142                                 /*
1143                                  * Just release all the brl locks, no need to release individually.
1144                                  */
1145
1146                                 brl_close(fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
1147                                 return;
1148                         }
1149                 }
1150
1151                 if ((ul_ctx = talloc_init()) == NULL) {
1152                         DEBUG(0,("locking_close_file: unable to init talloc context.\n"));
1153                         return;
1154                 }
1155
1156                 /*
1157                  * We need to release all POSIX locks we have on this
1158                  * fd. Get all our existing locks from the tdb locking database.
1159                  */
1160
1161                 ul = brl_getlocklist(ul_ctx, fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
1162
1163                 /*
1164                  * Now unlock all of them. This will remove the brl entry also
1165                  * for each lock. Note we need to make sure the global_smbpid matches
1166                  * the one associated with each lock in case the client plays games
1167                  * with smbpids (like smbtorture does :-).
1168                  */
1169
1170                 for(; ul; ul = ul->next) {
1171                         global_smbpid = ul->smbpid;
1172                         do_unlock(fsp,fsp->conn,ul->size,ul->start,&eclass,&ecode);
1173                 }
1174         
1175                 talloc_destroy(ul_ctx);
1176
1177         } else {
1178
1179                 /*
1180                  * Just release all the brl locks, no need to release individually.
1181                  */
1182
1183                 brl_close(fsp->dev, fsp->inode, pid, fsp->conn->cnum, fsp->fnum);
1184         }
1185 }
1186
1187 /*******************************************************************
1188  Create the in-memory POSIX lock databases.
1189 ********************************************************************/
1190
1191 void posix_lock_init(void)
1192 {
1193         if (posix_lock_tdb && posix_pending_close_tdb)
1194                 return;
1195
1196         if (!posix_lock_tdb)
1197                 posix_lock_tdb = tdb_open(NULL, 0, TDB_CLEAR_IF_FIRST,
1198                     O_RDWR|O_CREAT, 0644);
1199     if (!posix_lock_tdb) {
1200         DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1201     }
1202         if (!posix_pending_close_tdb)
1203                 posix_pending_close_tdb = tdb_open(NULL, 0, TDB_CLEAR_IF_FIRST,
1204                     O_RDWR|O_CREAT, 0644);
1205     if (!posix_pending_close_tdb) {
1206         DEBUG(0,("Failed to open POSIX pending close database.\n"));
1207     }
1208 }