Ok - this is the *third* implementation of this (third time's the charm :-).
[ira/wip.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28 extern int global_smbpid;
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76         key.device = dev;
77         key.inode = inode;
78         kbuf.dptr = (char *)&key;
79         kbuf.dsize = sizeof(key);
80         return kbuf;
81 }
82
83 /*******************************************************************
84  Convenience function to get a key from an fsp.
85 ******************************************************************/
86
87 static TDB_DATA locking_key_fsp(files_struct *fsp)
88 {
89         return locking_key(fsp->dev, fsp->inode);
90 }
91
92 /****************************************************************************
93  Add an fd to the pending close tdb.
94 ****************************************************************************/
95
96 static BOOL add_fd_to_close_entry(files_struct *fsp)
97 {
98         TDB_DATA kbuf = locking_key_fsp(fsp);
99         TDB_DATA dbuf;
100
101         dbuf.dptr = NULL;
102
103         tdb_lockchain(posix_pending_close_tdb, kbuf);
104         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
105
106         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
107         if (!dbuf.dptr) {
108                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
109                 tdb_unlockchain(posix_pending_close_tdb, kbuf);
110                 return False;
111         }
112         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
113         dbuf.dsize += sizeof(int);
114
115         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
116                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
117         }
118
119         free(dbuf.dptr);
120         tdb_unlockchain(posix_pending_close_tdb, kbuf);
121         return True;
122 }
123
124 /****************************************************************************
125  Remove all fd entries for a specific dev/inode pair from the tdb.
126 ****************************************************************************/
127
128 static void delete_close_entries(files_struct *fsp)
129 {
130         TDB_DATA kbuf = locking_key_fsp(fsp);
131
132         tdb_lockchain(posix_pending_close_tdb, kbuf);
133         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
134                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
135         tdb_unlockchain(posix_pending_close_tdb, kbuf);
136 }
137
138 /****************************************************************************
139  Get the array of POSIX pending close records for an open fsp. Caller must
140  free. Returns number of entries.
141 ****************************************************************************/
142
143 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
144 {
145         TDB_DATA kbuf = locking_key_fsp(fsp);
146         TDB_DATA dbuf;
147         size_t count = 0;
148
149         *entries = NULL;
150         dbuf.dptr = NULL;
151
152         tdb_lockchain(posix_pending_close_tdb, kbuf);
153         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
154
155     if (!dbuf.dptr) {
156                 tdb_unlockchain(posix_pending_close_tdb, kbuf);
157                 return 0;
158         }
159
160         *entries = (int *)dbuf.dptr;
161         count = (size_t)(dbuf.dsize / sizeof(int));
162
163         tdb_unlockchain(posix_pending_close_tdb, kbuf);
164
165         return count;
166 }
167
168 /****************************************************************************
169  Get the array of POSIX locks for an fsp. Caller must free. Returns
170  number of entries.
171 ****************************************************************************/
172
173 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
174 {
175         TDB_DATA kbuf = locking_key_fsp(fsp);
176         TDB_DATA dbuf;
177         size_t count = 0;
178
179         *entries = NULL;
180
181         dbuf.dptr = NULL;
182
183         tdb_lockchain(posix_lock_tdb, kbuf);
184         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
185
186     if (!dbuf.dptr) {
187                 tdb_unlockchain(posix_lock_tdb, kbuf);
188                 return 0;
189         }
190
191         *entries = (struct posix_lock *)dbuf.dptr;
192         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
193
194     tdb_unlockchain(posix_lock_tdb, kbuf);
195
196         return count;
197 }
198
199 /****************************************************************************
200  Deal with pending closes needed by POSIX locking support.
201 ****************************************************************************/
202
203 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
204 {
205         int saved_errno = 0;
206         int ret;
207         size_t count, i;
208         struct posix_lock *entries = NULL;
209         int *fd_array = NULL;
210
211         if (!lp_posix_locking(SNUM(conn))) {
212                 /*
213                  * No POSIX to worry about, just close.
214                  */
215                 ret = conn->vfs_ops.close(fsp->fd);
216                 fsp->fd = -1;
217                 return ret;
218         }
219
220         /*
221          * Get the number of outstanding POSIX locks on this dev/inode pair.
222          */
223
224         count = get_posix_lock_entries(fsp, &entries);
225         
226         if (count) {
227
228                 /*
229                  * There are outstanding locks on this dev/inode pair on other fds.
230                  * Add our fd to the pending close tdb and set fsp->fd to -1.
231                  */
232
233                 if (!add_fd_to_close_entry(fsp)) {
234                         free((char *)entries);
235                         return False;
236                 }
237
238                 free((char *)entries);
239                 fsp->fd = -1;
240                 return 0;
241         }
242
243         if(entries)
244                 free((char *)entries);
245
246         /*
247          * No outstanding POSIX locks. Get the pending close fd's
248          * from the tdb and close them all.
249          */
250
251         count = get_posix_pending_close_entries(fsp, &fd_array);
252
253         if (count) {
254                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
255
256                 for(i = 0; i < count; i++) {
257                         if (conn->vfs_ops.close(fd_array[i]) == -1) {
258                                 saved_errno = errno;
259                         }
260                 }
261
262                 if (fd_array)
263                         free((char *)fd_array);
264
265                 /*
266                  * Delete all fd's stored in the tdb
267                  * for this dev/inode pair.
268                  */
269
270                 delete_close_entries(fsp);
271         }
272
273         if (fd_array)
274                 free((char *)fd_array);
275
276         /*
277          * Finally close the fd associated with this fsp.
278          */
279
280         ret = conn->vfs_ops.close(fsp->fd);
281
282         if (saved_errno != 0) {
283         errno = saved_errno;
284                 ret = -1;
285     } 
286
287         fsp->fd = -1;
288
289         return ret;
290 }
291
292 /****************************************************************************
293  Debugging aid :-).
294 ****************************************************************************/
295
296 static const char *posix_lock_type_name(int lock_type)
297 {
298         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
299 }
300
301 /****************************************************************************
302  Add an entry into the POSIX locking tdb.
303 ****************************************************************************/
304
305 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type)
306 {
307         TDB_DATA kbuf = locking_key_fsp(fsp);
308         TDB_DATA dbuf;
309         struct posix_lock pl;
310
311         /*
312          * Now setup the new record.
313          */
314
315         pl.fd = fsp->fd;
316         pl.start = start;
317         pl.size = size;
318         pl.lock_type = lock_type;
319
320         dbuf.dptr = NULL;
321
322         tdb_lockchain(posix_lock_tdb, kbuf);
323         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
324
325         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
326         if (!dbuf.dptr) {
327                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
328                 goto fail;
329         }
330
331         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
332         dbuf.dsize += sizeof(pl);
333
334         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
335                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
336                 goto fail;
337         }
338
339     free(dbuf.dptr);
340     tdb_unlockchain(posix_lock_tdb, kbuf);
341
342         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f:dev=%.0f inode=%.0f\n",
343                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
344                         (double)fsp->dev, (double)fsp->inode ));
345
346     return True;
347
348  fail:
349     if (dbuf.dptr)
350                 free(dbuf.dptr);
351     tdb_unlockchain(posix_lock_tdb, kbuf);
352     return False;
353 }
354
355 /****************************************************************************
356  Delete an entry from the POSIX locking tdb.
357 ****************************************************************************/
358
359 static BOOL delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size)
360 {
361         TDB_DATA kbuf = locking_key_fsp(fsp);
362         TDB_DATA dbuf;
363         struct posix_lock *locks;
364         size_t i, count;
365
366         dbuf.dptr = NULL;
367
368         tdb_lockchain(posix_lock_tdb, kbuf);
369         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
370
371         if (!dbuf.dptr) {
372                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
373                 goto fail;
374         }
375
376         /* There are existing locks - find a match. */
377         locks = (struct posix_lock *)dbuf.dptr;
378         count = (size_t)(dbuf.dsize / sizeof(*locks));
379
380         for (i=0; i<count; i++) { 
381                 struct posix_lock *pl = &locks[i];
382
383                 if (pl->fd == fsp->fd &&
384                         pl->start == start &&
385                         pl->size == size) {
386                         /* Found it - delete it. */
387                         if (count == 1) {
388                                 tdb_delete(posix_lock_tdb, kbuf);
389                         } else {
390                                 if (i < count-1) {
391                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
392                                 }
393                                 dbuf.dsize -= sizeof(*locks);
394                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
395                         }
396
397                         free(dbuf.dptr);
398                         tdb_unlockchain(posix_lock_tdb, kbuf);
399                         return True;
400                 }
401         }
402
403         /* We didn't find it. */
404
405  fail:
406     if (dbuf.dptr)
407                 free(dbuf.dptr);
408     tdb_unlockchain(posix_lock_tdb, kbuf);
409     return False;
410 }
411
412 /****************************************************************************
413  Utility function to map a lock type correctly depending on the open
414  mode of a file.
415 ****************************************************************************/
416
417 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
418 {
419         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
420                 /*
421                  * Many UNIX's cannot get a write lock on a file opened read-only.
422                  * Win32 locking semantics allow this.
423                  * Do the best we can and attempt a read-only lock.
424                  */
425                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
426                 return F_RDLCK;
427         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
428                 /*
429                  * Ditto for read locks on write only files.
430                  */
431                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
432                 return F_WRLCK;
433         }
434
435   /*
436    * This return should be the most normal, as we attempt
437    * to always open files read/write.
438    */
439
440   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
441 }
442
443 /****************************************************************************
444  Check to see if the given unsigned lock range is within the possible POSIX
445  range. Modifies the given args to be in range if possible, just returns
446  False if not.
447 ****************************************************************************/
448
449 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
450                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
451 {
452         SMB_OFF_T offset;
453         SMB_OFF_T count;
454
455 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
456
457     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
458     SMB_OFF_T mask = (mask2<<1);
459     SMB_OFF_T neg_mask = ~mask;
460
461         /*
462          * In this case SMB_OFF_T is 64 bits,
463          * and the underlying system can handle 64 bit signed locks.
464          * Cast to signed type.
465          */
466
467         offset = (SMB_OFF_T)u_offset;
468         count = (SMB_OFF_T)u_count;
469
470         /*
471          * Deal with a very common case of count of all ones.
472          * (lock entire file).
473          */
474
475         if(count == (SMB_OFF_T)-1)
476                 count &= ~mask;
477
478         /*
479          * POSIX lock ranges cannot be negative.
480          * Fail if any combination becomes negative.
481          */
482
483         if(offset < 0 || count < 0 || (offset + count < 0)) {
484                 DEBUG(10,("posix_lock_in_range: negative range: offset = %.0f, count = %.0f. Ignoring lock.\n",
485                                 (double)offset, (double)count ));
486                 return False;
487         }
488
489         /*
490          * In this case SMB_OFF_T is 64 bits, the offset and count
491          * fit within the positive range, and the underlying
492          * system can handle 64 bit locks. Just return as the
493          * cast values are ok.
494          */
495
496 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
497
498         /*
499          * In this case either SMB_OFF_T is 32 bits,
500          * or the underlying system cannot handle 64 bit signed locks.
501          * Either way we have to try and mangle to fit within 31 bits.
502          * This is difficult.
503          */
504
505 #if defined(HAVE_BROKEN_FCNTL64_LOCKS)
506
507         /*
508          * SMB_OFF_T is 64 bits, but we need to use 31 bits due to
509          * broken large locking.
510          */
511
512         /*
513          * Deal with a very common case of count of all ones.
514          * (lock entire file).
515          */
516
517         if(u_count == (SMB_BIG_UINT)-1)
518                 count = 0x7FFFFFFF;
519
520         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
521                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. offset = %.0f, count = %.0f. Ignoring lock.\n",
522                                 (double)u_offset, (double)u_count ));
523                 /* Top 32 bits of offset or count were not zero. */
524                 return False;
525         }
526
527         /* Cast from 64 bits unsigned to 64 bits signed. */
528         offset = (SMB_OFF_T)u_offset;
529         count = (SMB_OFF_T)u_count;
530
531         /*
532          * Check if we are within the 2^31 range.
533          */
534
535         {
536                 int32 low_offset = (int32)offset;
537                 int32 low_count = (int32)count;
538
539                 if(low_offset < 0 || low_count < 0 || (low_offset + low_count < 0)) {
540                         DEBUG(10,("posix_lock_in_range: not within 2^31 range. low_offset = %d, low_count = %d. Ignoring lock.\n",
541                                         low_offset, low_count ));
542                         return False;
543                 }
544         }
545
546         /*
547          * Ok - we can map from a 64 bit number to a 31 bit lock.
548          */
549
550 #else /* HAVE_BROKEN_FCNTL64_LOCKS */
551
552         /*
553          * SMB_OFF_T is 32 bits.
554          */
555
556 #if defined(HAVE_LONGLONG)
557
558         /*
559          * SMB_BIG_UINT is 64 bits, we can do a 32 bit shift.
560          */
561
562         /*
563          * Deal with a very common case of count of all ones.
564          * (lock entire file).
565          */
566
567         if(u_count == (SMB_BIG_UINT)-1)
568                 count = 0x7FFFFFFF;
569
570         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
571                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. u_offset = %.0f, u_count = %.0f. Ignoring lock.\n",
572                                 (double)u_offset, (double)u_count ));
573                 return False;
574         }
575
576         /* Cast from 64 bits unsigned to 32 bits signed. */
577         offset = (SMB_OFF_T)u_offset;
578         count = (SMB_OFF_T)u_count;
579
580         /*
581          * Check if we are within the 2^31 range.
582          */
583
584         if(offset < 0 || count < 0 || (offset + count < 0)) {
585                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
586                                 (int)offset, (int)count ));
587                 return False;
588         }
589
590 #else /* HAVE_LONGLONG */
591
592         /*
593          * SMB_BIG_UINT and SMB_OFF_T are both 32 bits,
594          * just cast.
595          */
596
597         /*
598          * Deal with a very common case of count of all ones.
599          * (lock entire file).
600          */
601
602         if(u_count == (SMB_BIG_UINT)-1)
603                 count = 0x7FFFFFFF;
604
605         /* Cast from 32 bits unsigned to 32 bits signed. */
606         offset = (SMB_OFF_T)u_offset;
607         count = (SMB_OFF_T)u_count;
608
609         /*
610          * Check if we are within the 2^31 range.
611          */
612
613         if(offset < 0 || count < 0 || (offset + count < 0)) {
614                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
615                                 (int)offset, (int)count ));
616                 return False;
617         }
618
619 #endif /* HAVE_LONGLONG */
620 #endif /* LARGE_SMB_OFF_T */
621 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
622
623         /*
624          * The mapping was successful.
625          */
626
627         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
628                         (double)offset, (double)count ));
629
630         *offset_out = offset;
631         *count_out = count;
632         
633         return True;
634 }
635
636 /****************************************************************************
637  POSIX function to see if a file region is locked. Returns True if the
638  region is locked, False otherwise.
639 ****************************************************************************/
640
641 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
642 {
643         SMB_OFF_T offset;
644         SMB_OFF_T count;
645         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
646
647         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
648                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
649
650         /*
651          * If the requested lock won't fit in the POSIX range, we will
652          * never set it, so presume it is not locked.
653          */
654
655         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
656                 return False;
657
658         /*
659          * Note that most UNIX's can *test* for a write lock on
660          * a read-only fd, just not *set* a write lock on a read-only
661          * fd. So we don't need to use map_lock_type here.
662          */ 
663
664         return fcntl_lock(fsp->fd,SMB_F_GETLK,offset,count,posix_lock_type);
665 }
666
667 /****************************************************************************
668  POSIX function to acquire a lock. Returns True if the
669  lock could be granted, False if not.
670 ****************************************************************************/
671
672 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
673 {
674         SMB_OFF_T offset;
675         SMB_OFF_T count;
676         BOOL ret = True;
677         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
678
679         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
680                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
681
682         /*
683          * If the requested lock won't fit in the POSIX range, we will
684          * pretend it was successful.
685          */
686
687         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
688                 return True;
689
690         /*
691          * Note that setting multiple overlapping locks on different
692          * file descriptors will not be held separately by the kernel (POSIX
693          * braindamage), but will be merged into one continuous lock
694          * range. We cope with this case in the release_posix_lock code
695          * below. JRA.
696          */
697
698     ret = fcntl_lock(fsp->fd,SMB_F_SETLK,offset,count,posix_lock_type);
699
700         if (ret)
701                 add_posix_lock_entry(fsp,offset,count,posix_lock_type);
702
703         return ret;
704 }
705
706 /*
707  * Structure used when splitting a lock range
708  * into a POSIX lock range. Doubly linked list.
709  */
710
711 struct unlock_list {
712     struct unlock_list *next;
713     struct unlock_list *prev;
714     SMB_OFF_T start;
715     SMB_OFF_T size;
716 };
717
718 /****************************************************************************
719  Create a list of lock ranges that don't overlap a given range. Used in calculating
720  POSIX lock unlocks. This is a difficult function that requires ASCII art to
721  understand it :-).
722 ****************************************************************************/
723
724 static struct unlock_list *posix_unlock_list(TALLOC_CTX *ctx, struct unlock_list *ulhead, files_struct *fsp)
725 {
726         TDB_DATA kbuf = locking_key_fsp(fsp);
727         TDB_DATA dbuf;
728         struct posix_lock *locks;
729         size_t num_locks, i;
730
731         dbuf.dptr = NULL;
732
733         tdb_lockchain(posix_lock_tdb, kbuf);
734         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
735
736         if (!dbuf.dptr) {
737                 tdb_unlockchain(posix_lock_tdb, kbuf);
738                 return ulhead;
739         }
740         
741         locks = (struct posix_lock *)dbuf.dptr;
742         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
743
744         /*
745          * Check the current lock list on this dev/inode pair.
746          * Quit if the list is deleted.
747          */
748
749         DEBUG(10,("posix_unlock_list: curr: start=%.0f,size=%.0f\n",
750                 (double)ulhead->start, (double)ulhead->size ));
751
752         for (i=0; i<num_locks && ulhead; i++) {
753
754                 struct posix_lock *lock = &locks[i];
755                 struct unlock_list *ul_curr;
756
757                 /*
758                  * Walk the unlock list, checking for overlaps. Note that
759                  * the unlock list can expand within this loop if the current
760                  * range being examined needs to be split.
761                  */
762
763                 for (ul_curr = ulhead; ul_curr;) {
764
765                         DEBUG(10,("posix_unlock_list: lock: start=%.0f,size=%.0f:",
766                                 (double)lock->start, (double)lock->size ));
767
768                         if ( (ul_curr->start >= (lock->start + lock->size)) ||
769                                  (lock->start > (ul_curr->start + ul_curr->size))) {
770
771                                 /* No overlap with this lock - leave this range alone. */
772 /*********************************************
773                                              +---------+
774                                              | ul_curr |
775                                              +---------+
776                                 +-------+
777                                 | lock  |
778                                 +-------+
779 OR....
780              +---------+
781              | ul_curr |
782              +---------+
783 **********************************************/
784
785                                 DEBUG(10,("no overlap case.\n" ));
786
787                                 ul_curr = ul_curr->next;
788
789                         } else if ( (ul_curr->start >= lock->start) &&
790                                                 (ul_curr->start + ul_curr->size <= lock->start + lock->size) ) {
791
792                                 /*
793                                  * This unlock is completely overlapped by this existing lock range
794                                  * and thus should have no effect (not be unlocked). Delete it from the list.
795                                  */
796 /*********************************************
797                 +---------+
798                 | ul_curr |
799                 +---------+
800         +---------------------------+
801         |       lock                |
802         +---------------------------+
803 **********************************************/
804                                 /* Save the next pointer */
805                                 struct unlock_list *ul_next = ul_curr->next;
806
807                                 DEBUG(10,("delete case.\n" ));
808
809                                 DLIST_REMOVE(ulhead, ul_curr);
810                                 if(ulhead == NULL)
811                                         break; /* No more list... */
812
813                                 ul_curr = ul_next;
814                                 
815                         } else if ( (ul_curr->start >= lock->start) &&
816                                                 (ul_curr->start < lock->start + lock->size) &&
817                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
818
819                                 /*
820                                  * This unlock overlaps the existing lock range at the high end.
821                                  * Truncate by moving start to existing range end and reducing size.
822                                  */
823 /*********************************************
824                 +---------------+
825                 | ul_curr       |
826                 +---------------+
827         +---------------+
828         |    lock       |
829         +---------------+
830 BECOMES....
831                         +-------+
832                         |ul_curr|
833                         +-------+
834 **********************************************/
835
836                                 ul_curr->size = (ul_curr->start + ul_curr->size) - (lock->start + lock->size);
837                                 ul_curr->start = lock->start + lock->size;
838
839                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
840                                                                 (double)ul_curr->start, (double)ul_curr->size ));
841
842                                 ul_curr = ul_curr->next;
843
844                         } else if ( (ul_curr->start < lock->start) &&
845                                                 (ul_curr->start + ul_curr->size > lock->start) ) {
846
847                                 /*
848                                  * This unlock overlaps the existing lock range at the low end.
849                                  * Truncate by reducing size.
850                                  */
851 /*********************************************
852    +---------------+
853    | ul_curr       |
854    +---------------+
855            +---------------+
856            |    lock       |
857            +---------------+
858 BECOMES....
859    +-------+
860    |ul_curr|
861    +-------+
862 **********************************************/
863
864                                 ul_curr->size = lock->start - ul_curr->start;
865
866                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
867                                                                 (double)ul_curr->start, (double)ul_curr->size ));
868
869                                 ul_curr = ul_curr->next;
870                 
871                         } else if ( (ul_curr->start < lock->start) &&
872                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
873                                 /*
874                                  * Worst case scenario. Unlock request completely overlaps an existing
875                                  * lock range. Split the request into two, push the new (upper) request
876                                  * into the dlink list, and continue with the entry after ul_new (as we
877                                  * know that ul_new will not overlap with this lock).
878                                  */
879 /*********************************************
880         +---------------------------+
881         |       ul_curr             |
882         +---------------------------+
883                 +---------+
884                 | lock    |
885                 +---------+
886 BECOMES.....
887         +-------+         +---------+
888         |ul_curr|         |ul_new   |
889         +-------+         +---------+
890 **********************************************/
891                                 struct unlock_list *ul_new = (struct unlock_list *)talloc(ctx,
892                                                                                                         sizeof(struct unlock_list));
893
894                                 if(ul_new == NULL) {
895                                         DEBUG(0,("posix_unlock_list: talloc fail.\n"));
896                                         return NULL; /* The talloc_destroy takes care of cleanup. */
897                                 }
898
899                                 ZERO_STRUCTP(ul_new);
900                                 ul_new->start = lock->start + lock->size;
901                                 ul_new->size = ul_curr->start + ul_curr->size - ul_new->start;
902
903                                 /* Add into the dlink list after the ul_curr point - NOT at ulhead. */
904                                 DLIST_ADD(ul_curr, ul_new);
905
906                                 /* Truncate the ul_curr. */
907                                 ul_curr->size = lock->start - ul_curr->start;
908
909                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
910 new: start=%.0f,size=%.0f\n", (double)ul_curr->start, (double)ul_curr->size,
911                                                                 (double)ul_new->start, (double)ul_new->size ));
912
913                                 ul_curr = ul_new->next;
914
915                         } else {
916
917                                 /*
918                                  * This logic case should never happen. Ensure this is the
919                                  * case by forcing an abort.... Remove in production.
920                                  */
921
922                                 smb_panic("logic flaw in cases...\n");
923                         }
924                 } /* end for ( ul_curr = ulhead; ul_curr;) */
925         } /* end for (i=0; i<num_locks && ul_head; i++) */
926
927         tdb_unlockchain(posix_lock_tdb, kbuf);
928
929         if (dbuf.dptr)
930                 free(dbuf.dptr);
931         
932         return ulhead;
933 }
934
935 /****************************************************************************
936  POSIX function to release a lock. Returns True if the
937  lock could be released, False if not.
938 ****************************************************************************/
939
940 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
941 {
942         SMB_OFF_T offset;
943         SMB_OFF_T count;
944         BOOL ret = True;
945         TALLOC_CTX *ul_ctx = NULL;
946         struct unlock_list *ulist = NULL;
947         struct unlock_list *ul = NULL;
948
949         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
950                 fsp->fsp_name, (double)u_offset, (double)u_count ));
951
952         /*
953          * If the requested lock won't fit in the POSIX range, we will
954          * pretend it was successful.
955          */
956
957         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
958                 return True;
959
960         /*
961          * We treat this as one unlock request for POSIX accounting purposes even
962          * if it may have been split into multiple smaller POSIX unlock ranges.
963          */ 
964
965         delete_posix_lock_entry(fsp, offset, count);
966
967         if ((ul_ctx = talloc_init()) == NULL) {
968         DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
969                 return True; /* Not a fatal error. */
970         }
971
972         if ((ul = (struct unlock_list *)talloc(ul_ctx, sizeof(struct unlock_list))) == NULL) {
973                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
974                 talloc_destroy(ul_ctx);
975                 return True; /* Not a fatal error. */
976         }
977
978         /*
979          * Create the initial list entry containing the
980          * lock we want to remove.
981          */
982
983         ZERO_STRUCTP(ul);
984         ul->start = offset;
985         ul->size = count;
986
987         DLIST_ADD(ulist, ul);
988
989         /*
990          * The following call calculates if there are any
991          * overlapping locks held by this process on
992          * fd's open on the same file and creates a
993          * list of unlock ranges that will allow
994          * POSIX lock ranges to remain on the file whilst the
995          * unlocks are performed.
996          */
997
998         ulist = posix_unlock_list(ul_ctx, ulist, fsp);
999
1000         /*
1001          * Release the POSIX locks on the list of ranges returned.
1002          */
1003
1004         for(; ulist; ulist = ulist->next) {
1005                 offset = ulist->start;
1006                 count = ulist->size;
1007
1008                 if(u_count == 0) {
1009
1010                         /*
1011                          * This lock must overlap with an existing lock.
1012                          * Don't do any POSIX call.
1013                          */
1014
1015                         continue;
1016                 }
1017
1018                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1019                         (double)offset, (double)count ));
1020
1021                 if (!fcntl_lock(fsp->fd,SMB_F_SETLK,offset,count,F_UNLCK))
1022                         ret = False;
1023         }
1024
1025     talloc_destroy(ul_ctx);
1026
1027         return ret;
1028 }
1029
1030 /****************************************************************************
1031  Remove all lock entries for a specific dev/inode pair from the tdb.
1032 ****************************************************************************/
1033
1034 static void delete_posix_lock_entries(files_struct *fsp)
1035 {
1036         TDB_DATA kbuf = locking_key_fsp(fsp);
1037
1038         tdb_lockchain(posix_lock_tdb, kbuf);
1039         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1040                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1041         tdb_unlockchain(posix_lock_tdb, kbuf);
1042 }
1043
1044 /****************************************************************************
1045  Remove any locks on this fd. Called from file_close().
1046 ****************************************************************************/
1047
1048 void posix_locking_close_file(files_struct *fsp)
1049 {
1050         struct posix_lock *entries = NULL;
1051         size_t count, i;
1052
1053         /*
1054          * Optimization for the common case where we are the only
1055          * opener of a file. If all fd entries are our own, we don't
1056          * need to explicitly release all the locks via the POSIX functions,
1057          * we can just remove all the entries in the tdb and allow the
1058          * close to remove the real locks.
1059          */
1060
1061         count = get_posix_lock_entries(fsp, &entries);
1062
1063         if (count == 0) {
1064                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1065                 return;
1066         }
1067
1068         for (i = 0; i < count; i++) {
1069                 if (entries[i].fd != fsp->fd )
1070                         break;
1071         }
1072
1073         if (i == count) {
1074                 /* All locks are ours. */
1075                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1076                         fsp->fsp_name, (unsigned int)count ));
1077                 free((char *)entries);
1078                 delete_posix_lock_entries(fsp);
1079                 return;
1080         }
1081
1082         /*
1083          * Difficult case. We need to delete all our locks, whilst leaving
1084          * all other POSIX locks in place.
1085          */
1086
1087         for (i = 0; i < count; i++) {
1088                 struct posix_lock *pl = &entries[i];
1089                 release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1090         }
1091         free((char *)entries);
1092 }
1093
1094 /*******************************************************************
1095  Create the in-memory POSIX lock databases.
1096 ********************************************************************/
1097
1098 BOOL posix_locking_init(void)
1099 {
1100         if (posix_lock_tdb && posix_pending_close_tdb)
1101                 return True;
1102
1103         if (!posix_lock_tdb)
1104                 posix_lock_tdb = tdb_open(NULL, 0, TDB_CLEAR_IF_FIRST,
1105                     O_RDWR|O_CREAT, 0644);
1106     if (!posix_lock_tdb) {
1107         DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1108                 return False;
1109     }
1110         if (!posix_pending_close_tdb)
1111                 posix_pending_close_tdb = tdb_open(NULL, 0, TDB_CLEAR_IF_FIRST,
1112                     O_RDWR|O_CREAT, 0644);
1113     if (!posix_pending_close_tdb) {
1114         DEBUG(0,("Failed to open POSIX pending close database.\n"));
1115                 return False;
1116     }
1117
1118         return True;
1119 }
1120
1121 /*******************************************************************
1122  Delete the in-memory POSIX lock databases.
1123 ********************************************************************/
1124
1125 BOOL posix_locking_end(void)
1126 {
1127     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1128                 return False;
1129     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1130                 return False;
1131         return True;
1132 }