Fix for debug statement crash.
[kamenim/samba-autobuild/.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28 extern int global_smbpid;
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76         key.device = dev;
77         key.inode = inode;
78         kbuf.dptr = (char *)&key;
79         kbuf.dsize = sizeof(key);
80         return kbuf;
81 }
82
83 /*******************************************************************
84  Convenience function to get a key from an fsp.
85 ******************************************************************/
86
87 static TDB_DATA locking_key_fsp(files_struct *fsp)
88 {
89         return locking_key(fsp->dev, fsp->inode);
90 }
91
92 /****************************************************************************
93  Add an fd to the pending close tdb.
94 ****************************************************************************/
95
96 static BOOL add_fd_to_close_entry(files_struct *fsp)
97 {
98         TDB_DATA kbuf = locking_key_fsp(fsp);
99         TDB_DATA dbuf;
100
101         dbuf.dptr = NULL;
102
103         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
104
105         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
106         if (!dbuf.dptr) {
107                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
108                 return False;
109         }
110         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
111         dbuf.dsize += sizeof(int);
112
113         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
114                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
115         }
116
117         free(dbuf.dptr);
118         return True;
119 }
120
121 /****************************************************************************
122  Remove all fd entries for a specific dev/inode pair from the tdb.
123 ****************************************************************************/
124
125 static void delete_close_entries(files_struct *fsp)
126 {
127         TDB_DATA kbuf = locking_key_fsp(fsp);
128
129         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
130                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
131 }
132
133 /****************************************************************************
134  Get the array of POSIX pending close records for an open fsp. Caller must
135  free. Returns number of entries.
136 ****************************************************************************/
137
138 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
139 {
140         TDB_DATA kbuf = locking_key_fsp(fsp);
141         TDB_DATA dbuf;
142         size_t count = 0;
143
144         *entries = NULL;
145         dbuf.dptr = NULL;
146
147         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
148
149     if (!dbuf.dptr) {
150                 return 0;
151         }
152
153         *entries = (int *)dbuf.dptr;
154         count = (size_t)(dbuf.dsize / sizeof(int));
155
156         return count;
157 }
158
159 /****************************************************************************
160  Get the array of POSIX locks for an fsp. Caller must free. Returns
161  number of entries.
162 ****************************************************************************/
163
164 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
165 {
166         TDB_DATA kbuf = locking_key_fsp(fsp);
167         TDB_DATA dbuf;
168         size_t count = 0;
169
170         *entries = NULL;
171
172         dbuf.dptr = NULL;
173
174         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
175
176     if (!dbuf.dptr) {
177                 return 0;
178         }
179
180         *entries = (struct posix_lock *)dbuf.dptr;
181         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
182
183         return count;
184 }
185
186 /****************************************************************************
187  Deal with pending closes needed by POSIX locking support.
188  Note that posix_locking_close_file() is expected to have been called
189  to delete all locks on this fsp before this function is called.
190 ****************************************************************************/
191
192 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
193 {
194         int saved_errno = 0;
195         int ret;
196         size_t count, i;
197         struct posix_lock *entries = NULL;
198         int *fd_array = NULL;
199         BOOL locks_on_other_fds = False;
200
201         if (!lp_posix_locking(SNUM(conn))) {
202                 /*
203                  * No POSIX to worry about, just close.
204                  */
205                 ret = conn->vfs_ops.close(fsp->fd);
206                 fsp->fd = -1;
207                 return ret;
208         }
209
210         /*
211          * Get the number of outstanding POSIX locks on this dev/inode pair.
212          */
213
214         count = get_posix_lock_entries(fsp, &entries);
215
216         /*
217          * Check if there are any outstanding locks belonging to
218          * other fd's. This should never be the case if posix_locking_close_file()
219          * has been called first, but it never hurts to be *sure*.
220          */
221
222         for (i = 0; i < count; i++) {
223                 if (entries[i].fd != fsp->fd) {
224                         locks_on_other_fds = True;
225                         break;
226                 }
227         }
228
229         if (locks_on_other_fds) {
230
231                 /*
232                  * There are outstanding locks on this dev/inode pair on other fds.
233                  * Add our fd to the pending close tdb and set fsp->fd to -1.
234                  */
235
236                 if (!add_fd_to_close_entry(fsp)) {
237                         free((char *)entries);
238                         return False;
239                 }
240
241                 free((char *)entries);
242                 fsp->fd = -1;
243                 return 0;
244         }
245
246         if(entries)
247                 free((char *)entries);
248
249         /*
250          * No outstanding POSIX locks. Get the pending close fd's
251          * from the tdb and close them all.
252          */
253
254         count = get_posix_pending_close_entries(fsp, &fd_array);
255
256         if (count) {
257                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
258
259                 for(i = 0; i < count; i++) {
260                         if (conn->vfs_ops.close(fd_array[i]) == -1) {
261                                 saved_errno = errno;
262                         }
263                 }
264
265                 /*
266                  * Delete all fd's stored in the tdb
267                  * for this dev/inode pair.
268                  */
269
270                 delete_close_entries(fsp);
271         }
272
273         if (fd_array)
274                 free((char *)fd_array);
275
276         /*
277          * Finally close the fd associated with this fsp.
278          */
279
280         ret = conn->vfs_ops.close(fsp->fd);
281
282         if (saved_errno != 0) {
283         errno = saved_errno;
284                 ret = -1;
285     } 
286
287         fsp->fd = -1;
288
289         return ret;
290 }
291
292 /****************************************************************************
293  Debugging aid :-).
294 ****************************************************************************/
295
296 static const char *posix_lock_type_name(int lock_type)
297 {
298         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
299 }
300
301 /****************************************************************************
302  Add an entry into the POSIX locking tdb. Returns the number of records that
303  match the given start and size, or -1 on error.
304 ****************************************************************************/
305
306 static int add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type)
307 {
308         TDB_DATA kbuf = locking_key_fsp(fsp);
309         TDB_DATA dbuf;
310         struct posix_lock pl;
311         struct posix_lock *entries;
312         size_t i, count;
313         int num_records = 0;
314
315         /*
316          * Windows is very strange. It allows read locks to be overlayed on 
317          * a write lock, but leaves the write lock in force until the first
318          * unlock. It also reference counts the locks. This means the following sequence :
319          *
320          * process1                                      process2
321          * ------------------------------------------------------------------------
322          * WRITE LOCK : start = 0, len = 10
323          *                                            READ LOCK: start =0, len = 10 - FAIL
324          * READ LOCK : start = 0, len = 10
325          *                                            READ LOCK: start =0, len = 10 - FAIL
326          * UNLOCK : start = 0, len = 10
327          *                                            READ LOCK: start =0, len = 10 - OK
328          *
329          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
330          * would leave a single read lock over the 0-10 region. In order to
331          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
332          * entries, one for each overlayed lock request. We are guarenteed by the brlock
333          * semantics that if a write lock is added, then it will be first in the array.
334          */
335         
336         dbuf.dptr = NULL;
337
338         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
339
340         /*
341          * New record.
342          */
343
344         pl.fd = fsp->fd;
345         pl.start = start;
346         pl.size = size;
347         pl.lock_type = lock_type;
348
349         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
350         if (!dbuf.dptr) {
351                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
352                 goto fail;
353         }
354
355         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
356         dbuf.dsize += sizeof(pl);
357
358         count = (size_t)(dbuf.dsize / sizeof(pl));
359         entries = (struct posix_lock *)dbuf.dptr;
360
361         for (i = 0; i < count; i++) {
362                 struct posix_lock *entry = &entries[i];
363
364                 if (fsp->fd == entry->fd &&
365                         start == entry->start &&
366                         size == entry->size)
367                         num_records++;
368
369         }
370
371         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
372                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
373                 goto fail;
374         }
375
376     free(dbuf.dptr);
377
378         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: num_records = %d : dev=%.0f inode=%.0f\n",
379                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size, num_records,
380                         (double)fsp->dev, (double)fsp->inode ));
381
382     return num_records;
383
384  fail:
385     if (dbuf.dptr)
386                 free(dbuf.dptr);
387     return -1;
388 }
389
390 /****************************************************************************
391  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
392  deleted and the number of remaining matching records, or -1 on error.
393 ****************************************************************************/
394
395 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
396 {
397         TDB_DATA kbuf = locking_key_fsp(fsp);
398         TDB_DATA dbuf;
399         struct posix_lock *locks;
400         size_t i, count;
401         int num_records = 0;
402
403         dbuf.dptr = NULL;
404         
405         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
406
407         if (!dbuf.dptr) {
408                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
409                 goto fail;
410         }
411
412         /* There are existing locks - find a match. */
413         locks = (struct posix_lock *)dbuf.dptr;
414         count = (size_t)(dbuf.dsize / sizeof(*locks));
415
416         /*
417          * Count the number of entries that match this
418          * unlock request.
419          */
420
421         for (i = 0; i < count; i++) {
422                 struct posix_lock *entry = &locks[i];
423
424                 if (entry->fd == fsp->fd &&
425                         entry->start == start &&
426                         entry->size == size) {
427                                 num_records++;
428                 }
429         }
430
431         for (i=0; i<count; i++) { 
432                 struct posix_lock *entry = &locks[i];
433
434                 if (entry->fd == fsp->fd &&
435                         entry->start == start &&
436                         entry->size == size) {
437
438                         num_records--; /* We're deleting one. */
439
440                         /* Make a copy if requested. */
441                         if (pl)
442                                 *pl = *entry;
443
444                         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
445                                         posix_lock_type_name(entry->lock_type), (double)entry->start, (double)entry->size,
446                                         (unsigned int)num_records ));
447
448                         /* Found it - delete it. */
449                         if (count == 1) {
450                                 tdb_delete(posix_lock_tdb, kbuf);
451                         } else {
452                                 if (i < count-1) {
453                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
454                                 }
455                                 dbuf.dsize -= sizeof(*locks);
456                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
457                         }
458
459                         free(dbuf.dptr);
460                         return num_records;
461                 }
462         }
463
464         /* We didn't find it. */
465
466  fail:
467     if (dbuf.dptr)
468                 free(dbuf.dptr);
469     return -1;
470 }
471
472 /****************************************************************************
473  Utility function to map a lock type correctly depending on the open
474  mode of a file.
475 ****************************************************************************/
476
477 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
478 {
479         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
480                 /*
481                  * Many UNIX's cannot get a write lock on a file opened read-only.
482                  * Win32 locking semantics allow this.
483                  * Do the best we can and attempt a read-only lock.
484                  */
485                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
486                 return F_RDLCK;
487         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
488                 /*
489                  * Ditto for read locks on write only files.
490                  */
491                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
492                 return F_WRLCK;
493         }
494
495   /*
496    * This return should be the most normal, as we attempt
497    * to always open files read/write.
498    */
499
500   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
501 }
502
503 /****************************************************************************
504  Check to see if the given unsigned lock range is within the possible POSIX
505  range. Modifies the given args to be in range if possible, just returns
506  False if not.
507 ****************************************************************************/
508
509 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
510                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
511 {
512         SMB_OFF_T offset;
513         SMB_OFF_T count;
514
515 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
516
517     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
518     SMB_OFF_T mask = (mask2<<1);
519     SMB_OFF_T neg_mask = ~mask;
520
521         /*
522          * In this case SMB_OFF_T is 64 bits,
523          * and the underlying system can handle 64 bit signed locks.
524          * Cast to signed type.
525          */
526
527         offset = (SMB_OFF_T)u_offset;
528         count = (SMB_OFF_T)u_count;
529
530         /*
531          * Deal with a very common case of count of all ones.
532          * (lock entire file).
533          */
534
535         if(count == (SMB_OFF_T)-1)
536                 count &= ~mask;
537
538         /*
539          * POSIX lock ranges cannot be negative.
540          * Fail if any combination becomes negative.
541          */
542
543         if(offset < 0 || count < 0 || (offset + count < 0)) {
544                 DEBUG(10,("posix_lock_in_range: negative range: offset = %.0f, count = %.0f. Ignoring lock.\n",
545                                 (double)offset, (double)count ));
546                 return False;
547         }
548
549         /*
550          * In this case SMB_OFF_T is 64 bits, the offset and count
551          * fit within the positive range, and the underlying
552          * system can handle 64 bit locks. Just return as the
553          * cast values are ok.
554          */
555
556 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
557
558         /*
559          * In this case either SMB_OFF_T is 32 bits,
560          * or the underlying system cannot handle 64 bit signed locks.
561          * Either way we have to try and mangle to fit within 31 bits.
562          * This is difficult.
563          */
564
565 #if defined(HAVE_BROKEN_FCNTL64_LOCKS)
566
567         /*
568          * SMB_OFF_T is 64 bits, but we need to use 31 bits due to
569          * broken large locking.
570          */
571
572         /*
573          * Deal with a very common case of count of all ones.
574          * (lock entire file).
575          */
576
577         if(u_count == (SMB_BIG_UINT)-1)
578                 count = 0x7FFFFFFF;
579
580         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
581                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. offset = %.0f, count = %.0f. Ignoring lock.\n",
582                                 (double)u_offset, (double)u_count ));
583                 /* Top 32 bits of offset or count were not zero. */
584                 return False;
585         }
586
587         /* Cast from 64 bits unsigned to 64 bits signed. */
588         offset = (SMB_OFF_T)u_offset;
589         count = (SMB_OFF_T)u_count;
590
591         /*
592          * Check if we are within the 2^31 range.
593          */
594
595         {
596                 int32 low_offset = (int32)offset;
597                 int32 low_count = (int32)count;
598
599                 if(low_offset < 0 || low_count < 0 || (low_offset + low_count < 0)) {
600                         DEBUG(10,("posix_lock_in_range: not within 2^31 range. low_offset = %d, low_count = %d. Ignoring lock.\n",
601                                         low_offset, low_count ));
602                         return False;
603                 }
604         }
605
606         /*
607          * Ok - we can map from a 64 bit number to a 31 bit lock.
608          */
609
610 #else /* HAVE_BROKEN_FCNTL64_LOCKS */
611
612         /*
613          * SMB_OFF_T is 32 bits.
614          */
615
616 #if defined(HAVE_LONGLONG)
617
618         /*
619          * SMB_BIG_UINT is 64 bits, we can do a 32 bit shift.
620          */
621
622         /*
623          * Deal with a very common case of count of all ones.
624          * (lock entire file).
625          */
626
627         if(u_count == (SMB_BIG_UINT)-1)
628                 count = 0x7FFFFFFF;
629
630         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
631                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. u_offset = %.0f, u_count = %.0f. Ignoring lock.\n",
632                                 (double)u_offset, (double)u_count ));
633                 return False;
634         }
635
636         /* Cast from 64 bits unsigned to 32 bits signed. */
637         offset = (SMB_OFF_T)u_offset;
638         count = (SMB_OFF_T)u_count;
639
640         /*
641          * Check if we are within the 2^31 range.
642          */
643
644         if(offset < 0 || count < 0 || (offset + count < 0)) {
645                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
646                                 (int)offset, (int)count ));
647                 return False;
648         }
649
650 #else /* HAVE_LONGLONG */
651
652         /*
653          * SMB_BIG_UINT and SMB_OFF_T are both 32 bits,
654          * just cast.
655          */
656
657         /*
658          * Deal with a very common case of count of all ones.
659          * (lock entire file).
660          */
661
662         if(u_count == (SMB_BIG_UINT)-1)
663                 count = 0x7FFFFFFF;
664
665         /* Cast from 32 bits unsigned to 32 bits signed. */
666         offset = (SMB_OFF_T)u_offset;
667         count = (SMB_OFF_T)u_count;
668
669         /*
670          * Check if we are within the 2^31 range.
671          */
672
673         if(offset < 0 || count < 0 || (offset + count < 0)) {
674                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
675                                 (int)offset, (int)count ));
676                 return False;
677         }
678
679 #endif /* HAVE_LONGLONG */
680 #endif /* LARGE_SMB_OFF_T */
681 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
682
683         /*
684          * The mapping was successful.
685          */
686
687         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
688                         (double)offset, (double)count ));
689
690         *offset_out = offset;
691         *count_out = count;
692         
693         return True;
694 }
695
696 #if defined(LARGE_SMB_OFF_T)
697 /****************************************************************************
698  Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
699 ****************************************************************************/
700
701 static uint32 map_lock_offset(uint32 high, uint32 low)
702 {
703         unsigned int i;
704         uint32 mask = 0;
705         uint32 highcopy = high;
706
707         /*
708          * Try and find out how many significant bits there are in high.
709          */
710
711         for(i = 0; highcopy; i++)
712                 highcopy >>= 1;
713
714         /*
715          * We use 31 bits not 32 here as POSIX
716          * lock offsets may not be negative.
717          */
718
719         mask = (~0) << (31 - i);
720
721         if(low & mask)
722                 return 0; /* Fail. */
723
724         high <<= (31 - i);
725
726         return (high|low);
727 }
728 #endif
729
730 /****************************************************************************
731  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
732  broken NFS implementations.
733 ****************************************************************************/
734
735 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
736 {
737         int ret;
738         struct connection_struct *conn = fsp->conn;
739
740 #if defined(LARGE_SMB_OFF_T)
741         /*
742          * In the 64 bit locking case we store the original
743          * values in case we have to map to a 32 bit lock on
744          * a filesystem that doesn't support 64 bit locks.
745          */
746         SMB_OFF_T orig_offset = offset;
747         SMB_OFF_T orig_count = count;
748 #endif /* LARGE_SMB_OFF_T */
749
750         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
751
752         ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
753
754         if (!ret && (errno == EFBIG)) {
755                 if( DEBUGLVL( 0 )) {
756                         dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
757                         dbgtext("a 'file too large' error. This can happen when using 64 bit lock offsets\n");
758                         dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
759                 }
760                 /* 32 bit NFS file system, retry with smaller offset */
761                 errno = 0;
762                 count &= 0x7fffffff;
763                 ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
764         }
765
766         /* A lock query - just return. */
767         if (op == SMB_F_GETLK)
768                 return ret;
769
770         /* A lock set or unset. */
771         if (!ret) {
772                 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
773                                 (double)offset,(double)count,op,type,strerror(errno)));
774
775                 /* Perhaps it doesn't support this sort of locking ? */
776                 if (errno == EINVAL) {
777 #if defined(LARGE_SMB_OFF_T)
778                         {
779                                 /*
780                                  * Ok - if we get here then we have a 64 bit lock request
781                                  * that has returned EINVAL. Try and map to 31 bits for offset
782                                  * and length and try again. This may happen if a filesystem
783                                  * doesn't support 64 bit offsets (efs/ufs) although the underlying
784                                  * OS does.
785                                  */
786                                 uint32 off_low = (orig_offset & 0xFFFFFFFF);
787                                 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
788
789                                 count = (orig_count & 0x7FFFFFFF);
790                                 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
791                                 ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
792                                 if (!ret) {
793                                         if (errno == EINVAL) {
794                                                 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
795                                                 return(True);
796                                         }
797                                         return False;
798                                 }
799                                 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
800                                 return True;
801                         }
802 #else /* LARGE_SMB_OFF_T */
803                         DEBUG(3,("locking not supported? returning True\n"));
804                         return(True);
805 #endif /* LARGE_SMB_OFF_T */
806                 }
807
808                 return(False);
809         }
810
811         DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
812
813         return(True);
814 }
815
816 /****************************************************************************
817  POSIX function to see if a file region is locked. Returns True if the
818  region is locked, False otherwise.
819 ****************************************************************************/
820
821 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
822 {
823         SMB_OFF_T offset;
824         SMB_OFF_T count;
825         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
826
827         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
828                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
829
830         /*
831          * If the requested lock won't fit in the POSIX range, we will
832          * never set it, so presume it is not locked.
833          */
834
835         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
836                 return False;
837
838         /*
839          * Note that most UNIX's can *test* for a write lock on
840          * a read-only fd, just not *set* a write lock on a read-only
841          * fd. So we don't need to use map_lock_type here.
842          */ 
843
844         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
845 }
846
847 /****************************************************************************
848  POSIX function to acquire a lock. Returns True if the
849  lock could be granted, False if not.
850 ****************************************************************************/
851
852 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
853 {
854         SMB_OFF_T offset;
855         SMB_OFF_T count;
856         BOOL ret = True;
857         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
858         int ref_count;
859
860         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
861                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
862
863         /*
864          * If the requested lock won't fit in the POSIX range, we will
865          * pretend it was successful.
866          */
867
868         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
869                 return True;
870
871         /*
872          * Note that setting multiple overlapping locks on different
873          * file descriptors will not be held separately by the kernel (POSIX
874          * braindamage), but will be merged into one continuous lock
875          * range. We cope with this case in the release_posix_lock code
876          * below. We need to add the posix lock entry into the tdb before
877          * doing the real posix lock call to deal with the locking overlay
878          * case described above in add_posix_lock_entry().
879          */
880
881         ref_count = add_posix_lock_entry(fsp,offset,count,posix_lock_type);
882
883         if (ref_count == 1) {
884                 /*
885                  * First lock entry created. Do a real POSIX lock.
886                  */
887             ret = posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type);
888
889                 /*
890                  * Oops, POSIX lock failed, delete the tdb entry.
891                  */
892                 if (!ret)
893                         delete_posix_lock_entry(fsp,offset,count,NULL);
894         }
895
896         return ret;
897 }
898
899 /*
900  * Structure used when splitting a lock range
901  * into a POSIX lock range. Doubly linked list.
902  */
903
904 struct unlock_list {
905     struct unlock_list *next;
906     struct unlock_list *prev;
907     SMB_OFF_T start;
908     SMB_OFF_T size;
909 };
910
911 /****************************************************************************
912  Create a list of lock ranges that don't overlap a given range. Used in calculating
913  POSIX lock unlocks. This is a difficult function that requires ASCII art to
914  understand it :-).
915 ****************************************************************************/
916
917 static struct unlock_list *posix_unlock_list(TALLOC_CTX *ctx, struct unlock_list *ulhead, files_struct *fsp)
918 {
919         TDB_DATA kbuf = locking_key_fsp(fsp);
920         TDB_DATA dbuf;
921         struct posix_lock *locks;
922         size_t num_locks, i;
923
924         dbuf.dptr = NULL;
925
926         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
927
928         if (!dbuf.dptr) {
929                 return ulhead;
930         }
931         
932         locks = (struct posix_lock *)dbuf.dptr;
933         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
934
935         /*
936          * Check the current lock list on this dev/inode pair.
937          * Quit if the list is deleted.
938          */
939
940         DEBUG(10,("posix_unlock_list: curr: start=%.0f,size=%.0f\n",
941                 (double)ulhead->start, (double)ulhead->size ));
942
943         for (i=0; i<num_locks && ulhead; i++) {
944
945                 struct posix_lock *lock = &locks[i];
946                 struct unlock_list *ul_curr;
947
948                 /*
949                  * Walk the unlock list, checking for overlaps. Note that
950                  * the unlock list can expand within this loop if the current
951                  * range being examined needs to be split.
952                  */
953
954                 for (ul_curr = ulhead; ul_curr;) {
955
956                         DEBUG(10,("posix_unlock_list: lock: start=%.0f,size=%.0f:",
957                                 (double)lock->start, (double)lock->size ));
958
959                         if ( (ul_curr->start >= (lock->start + lock->size)) ||
960                                  (lock->start >= (ul_curr->start + ul_curr->size))) {
961
962                                 /* No overlap with this lock - leave this range alone. */
963 /*********************************************
964                                              +---------+
965                                              | ul_curr |
966                                              +---------+
967                                 +-------+
968                                 | lock  |
969                                 +-------+
970 OR....
971              +---------+
972              | ul_curr |
973              +---------+
974 **********************************************/
975
976                                 DEBUG(10,("no overlap case.\n" ));
977
978                                 ul_curr = ul_curr->next;
979
980                         } else if ( (ul_curr->start >= lock->start) &&
981                                                 (ul_curr->start + ul_curr->size <= lock->start + lock->size) ) {
982
983                                 /*
984                                  * This unlock is completely overlapped by this existing lock range
985                                  * and thus should have no effect (not be unlocked). Delete it from the list.
986                                  */
987 /*********************************************
988                 +---------+
989                 | ul_curr |
990                 +---------+
991         +---------------------------+
992         |       lock                |
993         +---------------------------+
994 **********************************************/
995                                 /* Save the next pointer */
996                                 struct unlock_list *ul_next = ul_curr->next;
997
998                                 DEBUG(10,("delete case.\n" ));
999
1000                                 DLIST_REMOVE(ulhead, ul_curr);
1001                                 if(ulhead == NULL)
1002                                         break; /* No more list... */
1003
1004                                 ul_curr = ul_next;
1005                                 
1006                         } else if ( (ul_curr->start >= lock->start) &&
1007                                                 (ul_curr->start < lock->start + lock->size) &&
1008                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
1009
1010                                 /*
1011                                  * This unlock overlaps the existing lock range at the high end.
1012                                  * Truncate by moving start to existing range end and reducing size.
1013                                  */
1014 /*********************************************
1015                 +---------------+
1016                 | ul_curr       |
1017                 +---------------+
1018         +---------------+
1019         |    lock       |
1020         +---------------+
1021 BECOMES....
1022                         +-------+
1023                         |ul_curr|
1024                         +-------+
1025 **********************************************/
1026
1027                                 ul_curr->size = (ul_curr->start + ul_curr->size) - (lock->start + lock->size);
1028                                 ul_curr->start = lock->start + lock->size;
1029
1030                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
1031                                                                 (double)ul_curr->start, (double)ul_curr->size ));
1032
1033                                 ul_curr = ul_curr->next;
1034
1035                         } else if ( (ul_curr->start < lock->start) &&
1036                                                 (ul_curr->start + ul_curr->size > lock->start) ) {
1037
1038                                 /*
1039                                  * This unlock overlaps the existing lock range at the low end.
1040                                  * Truncate by reducing size.
1041                                  */
1042 /*********************************************
1043    +---------------+
1044    | ul_curr       |
1045    +---------------+
1046            +---------------+
1047            |    lock       |
1048            +---------------+
1049 BECOMES....
1050    +-------+
1051    |ul_curr|
1052    +-------+
1053 **********************************************/
1054
1055                                 ul_curr->size = lock->start - ul_curr->start;
1056
1057                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
1058                                                                 (double)ul_curr->start, (double)ul_curr->size ));
1059
1060                                 ul_curr = ul_curr->next;
1061                 
1062                         } else if ( (ul_curr->start < lock->start) &&
1063                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
1064                                 /*
1065                                  * Worst case scenario. Unlock request completely overlaps an existing
1066                                  * lock range. Split the request into two, push the new (upper) request
1067                                  * into the dlink list, and continue with the entry after ul_new (as we
1068                                  * know that ul_new will not overlap with this lock).
1069                                  */
1070 /*********************************************
1071         +---------------------------+
1072         |       ul_curr             |
1073         +---------------------------+
1074                 +---------+
1075                 | lock    |
1076                 +---------+
1077 BECOMES.....
1078         +-------+         +---------+
1079         |ul_curr|         |ul_new   |
1080         +-------+         +---------+
1081 **********************************************/
1082                                 struct unlock_list *ul_new = (struct unlock_list *)talloc(ctx,
1083                                                                                                         sizeof(struct unlock_list));
1084
1085                                 if(ul_new == NULL) {
1086                                         DEBUG(0,("posix_unlock_list: talloc fail.\n"));
1087                                         return NULL; /* The talloc_destroy takes care of cleanup. */
1088                                 }
1089
1090                                 ZERO_STRUCTP(ul_new);
1091                                 ul_new->start = lock->start + lock->size;
1092                                 ul_new->size = ul_curr->start + ul_curr->size - ul_new->start;
1093
1094                                 /* Add into the dlink list after the ul_curr point - NOT at ulhead. */
1095                                 DLIST_ADD(ul_curr, ul_new);
1096
1097                                 /* Truncate the ul_curr. */
1098                                 ul_curr->size = lock->start - ul_curr->start;
1099
1100                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
1101 new: start=%.0f,size=%.0f\n", (double)ul_curr->start, (double)ul_curr->size,
1102                                                                 (double)ul_new->start, (double)ul_new->size ));
1103
1104                                 ul_curr = ul_new->next;
1105
1106                         } else {
1107
1108                                 /*
1109                                  * This logic case should never happen. Ensure this is the
1110                                  * case by forcing an abort.... Remove in production.
1111                                  */
1112                                 pstring msg;
1113
1114                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: ul_curr: start = %.0f, size = %.0f : \
1115 lock: start = %.0f, size = %.0f\n", (double)ul_curr->start, (double)ul_curr->size, (double)lock->start, (double)lock->size );
1116
1117                                 smb_panic(msg);
1118                         }
1119                 } /* end for ( ul_curr = ulhead; ul_curr;) */
1120         } /* end for (i=0; i<num_locks && ul_head; i++) */
1121
1122         if (dbuf.dptr)
1123                 free(dbuf.dptr);
1124         
1125         return ulhead;
1126 }
1127
1128 /****************************************************************************
1129  POSIX function to release a lock. Returns True if the
1130  lock could be released, False if not.
1131 ****************************************************************************/
1132
1133 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1134 {
1135         SMB_OFF_T offset;
1136         SMB_OFF_T count;
1137         BOOL ret = True;
1138         TALLOC_CTX *ul_ctx = NULL;
1139         struct unlock_list *ulist = NULL;
1140         struct unlock_list *ul = NULL;
1141         struct posix_lock deleted_lock;
1142         int num_entries;
1143
1144         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1145                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1146
1147         /*
1148          * If the requested lock won't fit in the POSIX range, we will
1149          * pretend it was successful.
1150          */
1151
1152         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1153                 return True;
1154
1155         /*
1156          * We treat this as one unlock request for POSIX accounting purposes even
1157          * if it may have been split into multiple smaller POSIX unlock ranges.
1158          */ 
1159
1160         num_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1161
1162         if (num_entries == -1) {
1163         smb_panic("release_posix_lock: unable find entry to delete !\n");
1164         }
1165
1166         /*
1167          * If num_entries is > 0, and the lock_type we just deleted from the tdb was
1168          * a POSIX write lock, then rather than doing an unlock we need to downgrade
1169          * the POSIX lock to a read lock.
1170          */
1171
1172         if (num_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1173                 return posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK);
1174         }
1175
1176         /*
1177          * Only do the POSIX unlock when the num_entries is now zero.
1178          */
1179
1180         if (num_entries > 0) {
1181                 DEBUG(10, ("release_posix_lock: num_entries = %d\n", num_entries ));
1182                 return True;
1183         }
1184
1185         if ((ul_ctx = talloc_init()) == NULL) {
1186         DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1187                 return True; /* Not a fatal error. */
1188         }
1189
1190         if ((ul = (struct unlock_list *)talloc(ul_ctx, sizeof(struct unlock_list))) == NULL) {
1191                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1192                 talloc_destroy(ul_ctx);
1193                 return True; /* Not a fatal error. */
1194         }
1195
1196         /*
1197          * Create the initial list entry containing the
1198          * lock we want to remove.
1199          */
1200
1201         ZERO_STRUCTP(ul);
1202         ul->start = offset;
1203         ul->size = count;
1204
1205         DLIST_ADD(ulist, ul);
1206
1207         /*
1208          * The following call calculates if there are any
1209          * overlapping locks held by this process on
1210          * fd's open on the same file and creates a
1211          * list of unlock ranges that will allow
1212          * POSIX lock ranges to remain on the file whilst the
1213          * unlocks are performed.
1214          */
1215
1216         ulist = posix_unlock_list(ul_ctx, ulist, fsp);
1217
1218         /*
1219          * Release the POSIX locks on the list of ranges returned.
1220          */
1221
1222         for(; ulist; ulist = ulist->next) {
1223                 offset = ulist->start;
1224                 count = ulist->size;
1225
1226                 if(u_count == 0) {
1227
1228                         /*
1229                          * This lock must overlap with an existing lock.
1230                          * Don't do any POSIX call.
1231                          */
1232
1233                         continue;
1234                 }
1235
1236                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1237                         (double)offset, (double)count ));
1238
1239                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1240                         ret = False;
1241         }
1242
1243         talloc_destroy(ul_ctx);
1244
1245         return ret;
1246 }
1247
1248 /****************************************************************************
1249  Remove all lock entries for a specific dev/inode pair from the tdb.
1250 ****************************************************************************/
1251
1252 static void delete_posix_lock_entries(files_struct *fsp)
1253 {
1254         TDB_DATA kbuf = locking_key_fsp(fsp);
1255
1256         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1257                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1258 }
1259
1260 /****************************************************************************
1261  Debug function.
1262 ****************************************************************************/
1263
1264 static void dump_entry(struct posix_lock *pl)
1265 {
1266         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1267                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1268 }
1269
1270 /****************************************************************************
1271  Remove any locks on this fd. Called from file_close().
1272 ****************************************************************************/
1273
1274 void posix_locking_close_file(files_struct *fsp)
1275 {
1276         struct posix_lock *entries = NULL;
1277         size_t count, i;
1278
1279         /*
1280          * Optimization for the common case where we are the only
1281          * opener of a file. If all fd entries are our own, we don't
1282          * need to explicitly release all the locks via the POSIX functions,
1283          * we can just remove all the entries in the tdb and allow the
1284          * close to remove the real locks.
1285          */
1286
1287         count = get_posix_lock_entries(fsp, &entries);
1288
1289         if (count == 0) {
1290                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1291                 return;
1292         }
1293
1294         for (i = 0; i < count; i++) {
1295                 if (entries[i].fd != fsp->fd )
1296                         break;
1297
1298                 dump_entry(&entries[i]);
1299         }
1300
1301         if (i == count) {
1302                 /* All locks are ours. */
1303                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1304                         fsp->fsp_name, (unsigned int)count ));
1305                 free((char *)entries);
1306                 delete_posix_lock_entries(fsp);
1307                 return;
1308         }
1309
1310         /*
1311          * Difficult case. We need to delete all our locks, whilst leaving
1312          * all other POSIX locks in place.
1313          */
1314
1315         for (i = 0; i < count; i++) {
1316                 struct posix_lock *pl = &entries[i];
1317                 if (pl->fd == fsp->fd)
1318                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1319         }
1320         free((char *)entries);
1321 }
1322
1323 /*******************************************************************
1324  Create the in-memory POSIX lock databases.
1325 ********************************************************************/
1326
1327 BOOL posix_locking_init(void)
1328 {
1329         if (posix_lock_tdb && posix_pending_close_tdb)
1330                 return True;
1331
1332         if (!posix_lock_tdb)
1333                 posix_lock_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1334                                           O_RDWR|O_CREAT, 0644);
1335     if (!posix_lock_tdb) {
1336         DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1337                 return False;
1338     }
1339         if (!posix_pending_close_tdb)
1340                 posix_pending_close_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1341                     O_RDWR|O_CREAT, 0644);
1342     if (!posix_pending_close_tdb) {
1343         DEBUG(0,("Failed to open POSIX pending close database.\n"));
1344                 return False;
1345     }
1346
1347         return True;
1348 }
1349
1350 /*******************************************************************
1351  Delete the in-memory POSIX lock databases.
1352 ********************************************************************/
1353
1354 BOOL posix_locking_end(void)
1355 {
1356     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1357                 return False;
1358     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1359                 return False;
1360         return True;
1361 }