Fix for stacking locks in brlock and POSIX. Windows only allows a read lock
[samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28 extern int global_smbpid;
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76         key.device = dev;
77         key.inode = inode;
78         kbuf.dptr = (char *)&key;
79         kbuf.dsize = sizeof(key);
80         return kbuf;
81 }
82
83 /*******************************************************************
84  Convenience function to get a key from an fsp.
85 ******************************************************************/
86
87 static TDB_DATA locking_key_fsp(files_struct *fsp)
88 {
89         return locking_key(fsp->dev, fsp->inode);
90 }
91
92 /****************************************************************************
93  Add an fd to the pending close tdb.
94 ****************************************************************************/
95
96 static BOOL add_fd_to_close_entry(files_struct *fsp)
97 {
98         TDB_DATA kbuf = locking_key_fsp(fsp);
99         TDB_DATA dbuf;
100
101         dbuf.dptr = NULL;
102
103         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
104
105         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
106         if (!dbuf.dptr) {
107                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
108                 return False;
109         }
110         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
111         dbuf.dsize += sizeof(int);
112
113         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
114                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
115         }
116
117         free(dbuf.dptr);
118         return True;
119 }
120
121 /****************************************************************************
122  Remove all fd entries for a specific dev/inode pair from the tdb.
123 ****************************************************************************/
124
125 static void delete_close_entries(files_struct *fsp)
126 {
127         TDB_DATA kbuf = locking_key_fsp(fsp);
128
129         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
130                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
131 }
132
133 /****************************************************************************
134  Get the array of POSIX pending close records for an open fsp. Caller must
135  free. Returns number of entries.
136 ****************************************************************************/
137
138 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
139 {
140         TDB_DATA kbuf = locking_key_fsp(fsp);
141         TDB_DATA dbuf;
142         size_t count = 0;
143
144         *entries = NULL;
145         dbuf.dptr = NULL;
146
147         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
148
149     if (!dbuf.dptr) {
150                 return 0;
151         }
152
153         *entries = (int *)dbuf.dptr;
154         count = (size_t)(dbuf.dsize / sizeof(int));
155
156         return count;
157 }
158
159 /****************************************************************************
160  Get the array of POSIX locks for an fsp. Caller must free. Returns
161  number of entries.
162 ****************************************************************************/
163
164 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
165 {
166         TDB_DATA kbuf = locking_key_fsp(fsp);
167         TDB_DATA dbuf;
168         size_t count = 0;
169
170         *entries = NULL;
171
172         dbuf.dptr = NULL;
173
174         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
175
176     if (!dbuf.dptr) {
177                 return 0;
178         }
179
180         *entries = (struct posix_lock *)dbuf.dptr;
181         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
182
183         return count;
184 }
185
186 /****************************************************************************
187  Deal with pending closes needed by POSIX locking support.
188 ****************************************************************************/
189
190 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
191 {
192         int saved_errno = 0;
193         int ret;
194         size_t count, i;
195         struct posix_lock *entries = NULL;
196         int *fd_array = NULL;
197
198         if (!lp_posix_locking(SNUM(conn))) {
199                 /*
200                  * No POSIX to worry about, just close.
201                  */
202                 ret = conn->vfs_ops.close(fsp->fd);
203                 fsp->fd = -1;
204                 return ret;
205         }
206
207         /*
208          * Get the number of outstanding POSIX locks on this dev/inode pair.
209          */
210
211         count = get_posix_lock_entries(fsp, &entries);
212         
213         if (count) {
214
215                 /*
216                  * There are outstanding locks on this dev/inode pair on other fds.
217                  * Add our fd to the pending close tdb and set fsp->fd to -1.
218                  */
219
220                 if (!add_fd_to_close_entry(fsp)) {
221                         free((char *)entries);
222                         return False;
223                 }
224
225                 free((char *)entries);
226                 fsp->fd = -1;
227                 return 0;
228         }
229
230         if(entries)
231                 free((char *)entries);
232
233         /*
234          * No outstanding POSIX locks. Get the pending close fd's
235          * from the tdb and close them all.
236          */
237
238         count = get_posix_pending_close_entries(fsp, &fd_array);
239
240         if (count) {
241                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
242
243                 for(i = 0; i < count; i++) {
244                         if (conn->vfs_ops.close(fd_array[i]) == -1) {
245                                 saved_errno = errno;
246                         }
247                 }
248
249                 /*
250                  * Delete all fd's stored in the tdb
251                  * for this dev/inode pair.
252                  */
253
254                 delete_close_entries(fsp);
255         }
256
257         if (fd_array)
258                 free((char *)fd_array);
259
260         /*
261          * Finally close the fd associated with this fsp.
262          */
263
264         ret = conn->vfs_ops.close(fsp->fd);
265
266         if (saved_errno != 0) {
267         errno = saved_errno;
268                 ret = -1;
269     } 
270
271         fsp->fd = -1;
272
273         return ret;
274 }
275
276 /****************************************************************************
277  Debugging aid :-).
278 ****************************************************************************/
279
280 static const char *posix_lock_type_name(int lock_type)
281 {
282         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
283 }
284
285 /****************************************************************************
286  Add an entry into the POSIX locking tdb. Returns the number of records that
287  match the given start and size, or -1 on error.
288 ****************************************************************************/
289
290 static int add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type)
291 {
292         TDB_DATA kbuf = locking_key_fsp(fsp);
293         TDB_DATA dbuf;
294         struct posix_lock pl;
295         struct posix_lock *entries;
296         size_t i, count;
297         int num_records = 0;
298
299         /*
300          * Windows is very strange. It allows read locks to be overlayed on 
301          * a write lock, but leaves the write lock in force until the first
302          * unlock. It also reference counts the locks. This means the following sequence :
303          *
304          * process1                                      process2
305          * ------------------------------------------------------------------------
306          * WRITE LOCK : start = 0, len = 10
307          *                                            READ LOCK: start =0, len = 10 - FAIL
308          * READ LOCK : start = 0, len = 10
309          *                                            READ LOCK: start =0, len = 10 - FAIL
310          * UNLOCK : start = 0, len = 10
311          *                                            READ LOCK: start =0, len = 10 - OK
312          *
313          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
314          * would leave a single read lock over the 0-10 region. In order to
315          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
316          * entries, one for each overlayed lock request. We are guarenteed by the brlock
317          * semantics that if a write lock is added, then it will be first in the array.
318          */
319         
320         dbuf.dptr = NULL;
321
322         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
323
324         /*
325          * New record.
326          */
327
328         pl.fd = fsp->fd;
329         pl.start = start;
330         pl.size = size;
331         pl.lock_type = lock_type;
332
333         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
334         if (!dbuf.dptr) {
335                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
336                 goto fail;
337         }
338
339         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
340         dbuf.dsize += sizeof(pl);
341
342         count = (size_t)(dbuf.dsize / sizeof(pl));
343         entries = (struct posix_lock *)dbuf.dptr;
344
345         for (i = 0; i < count; i++) {
346                 struct posix_lock *entry = &entries[i];
347
348                 if (fsp->fd == entry->fd &&
349                         start == entry->start &&
350                         size == entry->size)
351                         num_records++;
352
353         }
354
355         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
356                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
357                 goto fail;
358         }
359
360     free(dbuf.dptr);
361
362         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: num_records = %d : dev=%.0f inode=%.0f\n",
363                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size, num_records,
364                         (double)fsp->dev, (double)fsp->inode ));
365
366     return num_records;
367
368  fail:
369     if (dbuf.dptr)
370                 free(dbuf.dptr);
371     return -1;
372 }
373
374 /****************************************************************************
375  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
376  deleted and the number of remaining matching records, or -1 on error.
377 ****************************************************************************/
378
379 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
380 {
381         TDB_DATA kbuf = locking_key_fsp(fsp);
382         TDB_DATA dbuf;
383         struct posix_lock *locks;
384         size_t i, count;
385         int num_records = 0;
386
387         dbuf.dptr = NULL;
388         
389         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
390
391         if (!dbuf.dptr) {
392                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
393                 goto fail;
394         }
395
396         /* There are existing locks - find a match. */
397         locks = (struct posix_lock *)dbuf.dptr;
398         count = (size_t)(dbuf.dsize / sizeof(*locks));
399
400         /*
401          * Count the number of entries that match this
402          * unlock request.
403          */
404
405         for (i = 0; i < count; i++) {
406                 struct posix_lock *entry = &locks[i];
407
408                 if (entry->fd == fsp->fd &&
409                         entry->start == start &&
410                         entry->size == size) {
411                                 num_records++;
412                 }
413         }
414
415         for (i=0; i<count; i++) { 
416                 struct posix_lock *entry = &locks[i];
417
418                 if (entry->fd == fsp->fd &&
419                         entry->start == start &&
420                         entry->size == size) {
421
422                         num_records--; /* We're deleting one. */
423
424                         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
425                                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
426                                         (unsigned int)num_records ));
427
428                         /* Make a copy if requested. */
429                         if (pl)
430                                 *pl = *entry;
431
432                         /* Found it - delete it. */
433                         if (count == 1) {
434                                 tdb_delete(posix_lock_tdb, kbuf);
435                         } else {
436                                 if (i < count-1) {
437                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
438                                 }
439                                 dbuf.dsize -= sizeof(*locks);
440                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
441                         }
442
443                         free(dbuf.dptr);
444                         return num_records;
445                 }
446         }
447
448         /* We didn't find it. */
449
450  fail:
451     if (dbuf.dptr)
452                 free(dbuf.dptr);
453     return -1;
454 }
455
456 /****************************************************************************
457  Utility function to map a lock type correctly depending on the open
458  mode of a file.
459 ****************************************************************************/
460
461 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
462 {
463         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
464                 /*
465                  * Many UNIX's cannot get a write lock on a file opened read-only.
466                  * Win32 locking semantics allow this.
467                  * Do the best we can and attempt a read-only lock.
468                  */
469                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
470                 return F_RDLCK;
471         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
472                 /*
473                  * Ditto for read locks on write only files.
474                  */
475                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
476                 return F_WRLCK;
477         }
478
479   /*
480    * This return should be the most normal, as we attempt
481    * to always open files read/write.
482    */
483
484   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
485 }
486
487 /****************************************************************************
488  Check to see if the given unsigned lock range is within the possible POSIX
489  range. Modifies the given args to be in range if possible, just returns
490  False if not.
491 ****************************************************************************/
492
493 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
494                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
495 {
496         SMB_OFF_T offset;
497         SMB_OFF_T count;
498
499 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
500
501     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
502     SMB_OFF_T mask = (mask2<<1);
503     SMB_OFF_T neg_mask = ~mask;
504
505         /*
506          * In this case SMB_OFF_T is 64 bits,
507          * and the underlying system can handle 64 bit signed locks.
508          * Cast to signed type.
509          */
510
511         offset = (SMB_OFF_T)u_offset;
512         count = (SMB_OFF_T)u_count;
513
514         /*
515          * Deal with a very common case of count of all ones.
516          * (lock entire file).
517          */
518
519         if(count == (SMB_OFF_T)-1)
520                 count &= ~mask;
521
522         /*
523          * POSIX lock ranges cannot be negative.
524          * Fail if any combination becomes negative.
525          */
526
527         if(offset < 0 || count < 0 || (offset + count < 0)) {
528                 DEBUG(10,("posix_lock_in_range: negative range: offset = %.0f, count = %.0f. Ignoring lock.\n",
529                                 (double)offset, (double)count ));
530                 return False;
531         }
532
533         /*
534          * In this case SMB_OFF_T is 64 bits, the offset and count
535          * fit within the positive range, and the underlying
536          * system can handle 64 bit locks. Just return as the
537          * cast values are ok.
538          */
539
540 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
541
542         /*
543          * In this case either SMB_OFF_T is 32 bits,
544          * or the underlying system cannot handle 64 bit signed locks.
545          * Either way we have to try and mangle to fit within 31 bits.
546          * This is difficult.
547          */
548
549 #if defined(HAVE_BROKEN_FCNTL64_LOCKS)
550
551         /*
552          * SMB_OFF_T is 64 bits, but we need to use 31 bits due to
553          * broken large locking.
554          */
555
556         /*
557          * Deal with a very common case of count of all ones.
558          * (lock entire file).
559          */
560
561         if(u_count == (SMB_BIG_UINT)-1)
562                 count = 0x7FFFFFFF;
563
564         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
565                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. offset = %.0f, count = %.0f. Ignoring lock.\n",
566                                 (double)u_offset, (double)u_count ));
567                 /* Top 32 bits of offset or count were not zero. */
568                 return False;
569         }
570
571         /* Cast from 64 bits unsigned to 64 bits signed. */
572         offset = (SMB_OFF_T)u_offset;
573         count = (SMB_OFF_T)u_count;
574
575         /*
576          * Check if we are within the 2^31 range.
577          */
578
579         {
580                 int32 low_offset = (int32)offset;
581                 int32 low_count = (int32)count;
582
583                 if(low_offset < 0 || low_count < 0 || (low_offset + low_count < 0)) {
584                         DEBUG(10,("posix_lock_in_range: not within 2^31 range. low_offset = %d, low_count = %d. Ignoring lock.\n",
585                                         low_offset, low_count ));
586                         return False;
587                 }
588         }
589
590         /*
591          * Ok - we can map from a 64 bit number to a 31 bit lock.
592          */
593
594 #else /* HAVE_BROKEN_FCNTL64_LOCKS */
595
596         /*
597          * SMB_OFF_T is 32 bits.
598          */
599
600 #if defined(HAVE_LONGLONG)
601
602         /*
603          * SMB_BIG_UINT is 64 bits, we can do a 32 bit shift.
604          */
605
606         /*
607          * Deal with a very common case of count of all ones.
608          * (lock entire file).
609          */
610
611         if(u_count == (SMB_BIG_UINT)-1)
612                 count = 0x7FFFFFFF;
613
614         if(((u_offset >> 32) & 0xFFFFFFFF) || ((u_count >> 32) & 0xFFFFFFFF)) {
615                 DEBUG(10,("posix_lock_in_range: top 32 bits not zero. u_offset = %.0f, u_count = %.0f. Ignoring lock.\n",
616                                 (double)u_offset, (double)u_count ));
617                 return False;
618         }
619
620         /* Cast from 64 bits unsigned to 32 bits signed. */
621         offset = (SMB_OFF_T)u_offset;
622         count = (SMB_OFF_T)u_count;
623
624         /*
625          * Check if we are within the 2^31 range.
626          */
627
628         if(offset < 0 || count < 0 || (offset + count < 0)) {
629                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
630                                 (int)offset, (int)count ));
631                 return False;
632         }
633
634 #else /* HAVE_LONGLONG */
635
636         /*
637          * SMB_BIG_UINT and SMB_OFF_T are both 32 bits,
638          * just cast.
639          */
640
641         /*
642          * Deal with a very common case of count of all ones.
643          * (lock entire file).
644          */
645
646         if(u_count == (SMB_BIG_UINT)-1)
647                 count = 0x7FFFFFFF;
648
649         /* Cast from 32 bits unsigned to 32 bits signed. */
650         offset = (SMB_OFF_T)u_offset;
651         count = (SMB_OFF_T)u_count;
652
653         /*
654          * Check if we are within the 2^31 range.
655          */
656
657         if(offset < 0 || count < 0 || (offset + count < 0)) {
658                 DEBUG(10,("posix_lock_in_range: not within 2^31 range. offset = %d, count = %d. Ignoring lock.\n",
659                                 (int)offset, (int)count ));
660                 return False;
661         }
662
663 #endif /* HAVE_LONGLONG */
664 #endif /* LARGE_SMB_OFF_T */
665 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
666
667         /*
668          * The mapping was successful.
669          */
670
671         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
672                         (double)offset, (double)count ));
673
674         *offset_out = offset;
675         *count_out = count;
676         
677         return True;
678 }
679
680 #if defined(LARGE_SMB_OFF_T)
681 /****************************************************************************
682  Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
683 ****************************************************************************/
684
685 static uint32 map_lock_offset(uint32 high, uint32 low)
686 {
687         unsigned int i;
688         uint32 mask = 0;
689         uint32 highcopy = high;
690
691         /*
692          * Try and find out how many significant bits there are in high.
693          */
694
695         for(i = 0; highcopy; i++)
696                 highcopy >>= 1;
697
698         /*
699          * We use 31 bits not 32 here as POSIX
700          * lock offsets may not be negative.
701          */
702
703         mask = (~0) << (31 - i);
704
705         if(low & mask)
706                 return 0; /* Fail. */
707
708         high <<= (31 - i);
709
710         return (high|low);
711 }
712 #endif
713
714 /****************************************************************************
715  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
716  broken NFS implementations.
717 ****************************************************************************/
718
719 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
720 {
721         int ret;
722         struct connection_struct *conn = fsp->conn;
723
724 #if defined(LARGE_SMB_OFF_T)
725         /*
726          * In the 64 bit locking case we store the original
727          * values in case we have to map to a 32 bit lock on
728          * a filesystem that doesn't support 64 bit locks.
729          */
730         SMB_OFF_T orig_offset = offset;
731         SMB_OFF_T orig_count = count;
732 #endif /* LARGE_SMB_OFF_T */
733
734         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
735
736         ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
737
738         if (!ret && (errno == EFBIG)) {
739                 if( DEBUGLVL( 0 )) {
740                         dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
741                         dbgtext("a 'file too large' error. This can happen when using 64 bit lock offsets\n");
742                         dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
743                 }
744                 /* 32 bit NFS file system, retry with smaller offset */
745                 errno = 0;
746                 count &= 0x7fffffff;
747                 ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
748         }
749
750         /* A lock query - just return. */
751         if (op == SMB_F_GETLK)
752                 return ret;
753
754         /* A lock set or unset. */
755         if (!ret) {
756                 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
757                                 (double)offset,(double)count,op,type,strerror(errno)));
758
759                 /* Perhaps it doesn't support this sort of locking ? */
760                 if (errno == EINVAL) {
761 #if defined(LARGE_SMB_OFF_T)
762                         {
763                                 /*
764                                  * Ok - if we get here then we have a 64 bit lock request
765                                  * that has returned EINVAL. Try and map to 31 bits for offset
766                                  * and length and try again. This may happen if a filesystem
767                                  * doesn't support 64 bit offsets (efs/ufs) although the underlying
768                                  * OS does.
769                                  */
770                                 uint32 off_low = (orig_offset & 0xFFFFFFFF);
771                                 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
772
773                                 count = (orig_count & 0x7FFFFFFF);
774                                 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
775                                 ret = conn->vfs_ops.lock(fsp->fd,op,offset,count,type);
776                                 if (!ret) {
777                                         if (errno == EINVAL) {
778                                                 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
779                                                 return(True);
780                                         }
781                                         return False;
782                                 }
783                                 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
784                                 return True;
785                         }
786 #else /* LARGE_SMB_OFF_T */
787                         DEBUG(3,("locking not supported? returning True\n"));
788                         return(True);
789 #endif /* LARGE_SMB_OFF_T */
790                 }
791
792                 return(False);
793         }
794
795         DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
796
797         return(True);
798 }
799
800 /****************************************************************************
801  POSIX function to see if a file region is locked. Returns True if the
802  region is locked, False otherwise.
803 ****************************************************************************/
804
805 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
806 {
807         SMB_OFF_T offset;
808         SMB_OFF_T count;
809         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
810
811         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
812                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
813
814         /*
815          * If the requested lock won't fit in the POSIX range, we will
816          * never set it, so presume it is not locked.
817          */
818
819         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
820                 return False;
821
822         /*
823          * Note that most UNIX's can *test* for a write lock on
824          * a read-only fd, just not *set* a write lock on a read-only
825          * fd. So we don't need to use map_lock_type here.
826          */ 
827
828         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
829 }
830
831 /****************************************************************************
832  POSIX function to acquire a lock. Returns True if the
833  lock could be granted, False if not.
834 ****************************************************************************/
835
836 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
837 {
838         SMB_OFF_T offset;
839         SMB_OFF_T count;
840         BOOL ret = True;
841         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
842         int ref_count;
843
844         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
845                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
846
847         /*
848          * If the requested lock won't fit in the POSIX range, we will
849          * pretend it was successful.
850          */
851
852         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
853                 return True;
854
855         /*
856          * Note that setting multiple overlapping locks on different
857          * file descriptors will not be held separately by the kernel (POSIX
858          * braindamage), but will be merged into one continuous lock
859          * range. We cope with this case in the release_posix_lock code
860          * below. We need to add the posix lock entry into the tdb before
861          * doing the real posix lock call to deal with the locking overlay
862          * case described above in add_posix_lock_entry().
863          */
864
865         ref_count = add_posix_lock_entry(fsp,offset,count,posix_lock_type);
866
867         if (ref_count == 1) {
868                 /*
869                  * First lock entry created. Do a real POSIX lock.
870                  */
871             ret = posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type);
872
873                 /*
874                  * Oops, POSIX lock failed, delete the tdb entry.
875                  */
876                 if (!ret)
877                         delete_posix_lock_entry(fsp,offset,count,NULL);
878         }
879
880         return ret;
881 }
882
883 /*
884  * Structure used when splitting a lock range
885  * into a POSIX lock range. Doubly linked list.
886  */
887
888 struct unlock_list {
889     struct unlock_list *next;
890     struct unlock_list *prev;
891     SMB_OFF_T start;
892     SMB_OFF_T size;
893 };
894
895 /****************************************************************************
896  Create a list of lock ranges that don't overlap a given range. Used in calculating
897  POSIX lock unlocks. This is a difficult function that requires ASCII art to
898  understand it :-).
899 ****************************************************************************/
900
901 static struct unlock_list *posix_unlock_list(TALLOC_CTX *ctx, struct unlock_list *ulhead, files_struct *fsp)
902 {
903         TDB_DATA kbuf = locking_key_fsp(fsp);
904         TDB_DATA dbuf;
905         struct posix_lock *locks;
906         size_t num_locks, i;
907
908         dbuf.dptr = NULL;
909
910         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
911
912         if (!dbuf.dptr) {
913                 return ulhead;
914         }
915         
916         locks = (struct posix_lock *)dbuf.dptr;
917         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
918
919         /*
920          * Check the current lock list on this dev/inode pair.
921          * Quit if the list is deleted.
922          */
923
924         DEBUG(10,("posix_unlock_list: curr: start=%.0f,size=%.0f\n",
925                 (double)ulhead->start, (double)ulhead->size ));
926
927         for (i=0; i<num_locks && ulhead; i++) {
928
929                 struct posix_lock *lock = &locks[i];
930                 struct unlock_list *ul_curr;
931
932                 /*
933                  * Walk the unlock list, checking for overlaps. Note that
934                  * the unlock list can expand within this loop if the current
935                  * range being examined needs to be split.
936                  */
937
938                 for (ul_curr = ulhead; ul_curr;) {
939
940                         DEBUG(10,("posix_unlock_list: lock: start=%.0f,size=%.0f:",
941                                 (double)lock->start, (double)lock->size ));
942
943                         if ( (ul_curr->start >= (lock->start + lock->size)) ||
944                                  (lock->start > (ul_curr->start + ul_curr->size))) {
945
946                                 /* No overlap with this lock - leave this range alone. */
947 /*********************************************
948                                              +---------+
949                                              | ul_curr |
950                                              +---------+
951                                 +-------+
952                                 | lock  |
953                                 +-------+
954 OR....
955              +---------+
956              | ul_curr |
957              +---------+
958 **********************************************/
959
960                                 DEBUG(10,("no overlap case.\n" ));
961
962                                 ul_curr = ul_curr->next;
963
964                         } else if ( (ul_curr->start >= lock->start) &&
965                                                 (ul_curr->start + ul_curr->size <= lock->start + lock->size) ) {
966
967                                 /*
968                                  * This unlock is completely overlapped by this existing lock range
969                                  * and thus should have no effect (not be unlocked). Delete it from the list.
970                                  */
971 /*********************************************
972                 +---------+
973                 | ul_curr |
974                 +---------+
975         +---------------------------+
976         |       lock                |
977         +---------------------------+
978 **********************************************/
979                                 /* Save the next pointer */
980                                 struct unlock_list *ul_next = ul_curr->next;
981
982                                 DEBUG(10,("delete case.\n" ));
983
984                                 DLIST_REMOVE(ulhead, ul_curr);
985                                 if(ulhead == NULL)
986                                         break; /* No more list... */
987
988                                 ul_curr = ul_next;
989                                 
990                         } else if ( (ul_curr->start >= lock->start) &&
991                                                 (ul_curr->start < lock->start + lock->size) &&
992                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
993
994                                 /*
995                                  * This unlock overlaps the existing lock range at the high end.
996                                  * Truncate by moving start to existing range end and reducing size.
997                                  */
998 /*********************************************
999                 +---------------+
1000                 | ul_curr       |
1001                 +---------------+
1002         +---------------+
1003         |    lock       |
1004         +---------------+
1005 BECOMES....
1006                         +-------+
1007                         |ul_curr|
1008                         +-------+
1009 **********************************************/
1010
1011                                 ul_curr->size = (ul_curr->start + ul_curr->size) - (lock->start + lock->size);
1012                                 ul_curr->start = lock->start + lock->size;
1013
1014                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
1015                                                                 (double)ul_curr->start, (double)ul_curr->size ));
1016
1017                                 ul_curr = ul_curr->next;
1018
1019                         } else if ( (ul_curr->start < lock->start) &&
1020                                                 (ul_curr->start + ul_curr->size > lock->start) ) {
1021
1022                                 /*
1023                                  * This unlock overlaps the existing lock range at the low end.
1024                                  * Truncate by reducing size.
1025                                  */
1026 /*********************************************
1027    +---------------+
1028    | ul_curr       |
1029    +---------------+
1030            +---------------+
1031            |    lock       |
1032            +---------------+
1033 BECOMES....
1034    +-------+
1035    |ul_curr|
1036    +-------+
1037 **********************************************/
1038
1039                                 ul_curr->size = lock->start - ul_curr->start;
1040
1041                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
1042                                                                 (double)ul_curr->start, (double)ul_curr->size ));
1043
1044                                 ul_curr = ul_curr->next;
1045                 
1046                         } else if ( (ul_curr->start < lock->start) &&
1047                                                 (ul_curr->start + ul_curr->size > lock->start + lock->size) ) {
1048                                 /*
1049                                  * Worst case scenario. Unlock request completely overlaps an existing
1050                                  * lock range. Split the request into two, push the new (upper) request
1051                                  * into the dlink list, and continue with the entry after ul_new (as we
1052                                  * know that ul_new will not overlap with this lock).
1053                                  */
1054 /*********************************************
1055         +---------------------------+
1056         |       ul_curr             |
1057         +---------------------------+
1058                 +---------+
1059                 | lock    |
1060                 +---------+
1061 BECOMES.....
1062         +-------+         +---------+
1063         |ul_curr|         |ul_new   |
1064         +-------+         +---------+
1065 **********************************************/
1066                                 struct unlock_list *ul_new = (struct unlock_list *)talloc(ctx,
1067                                                                                                         sizeof(struct unlock_list));
1068
1069                                 if(ul_new == NULL) {
1070                                         DEBUG(0,("posix_unlock_list: talloc fail.\n"));
1071                                         return NULL; /* The talloc_destroy takes care of cleanup. */
1072                                 }
1073
1074                                 ZERO_STRUCTP(ul_new);
1075                                 ul_new->start = lock->start + lock->size;
1076                                 ul_new->size = ul_curr->start + ul_curr->size - ul_new->start;
1077
1078                                 /* Add into the dlink list after the ul_curr point - NOT at ulhead. */
1079                                 DLIST_ADD(ul_curr, ul_new);
1080
1081                                 /* Truncate the ul_curr. */
1082                                 ul_curr->size = lock->start - ul_curr->start;
1083
1084                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
1085 new: start=%.0f,size=%.0f\n", (double)ul_curr->start, (double)ul_curr->size,
1086                                                                 (double)ul_new->start, (double)ul_new->size ));
1087
1088                                 ul_curr = ul_new->next;
1089
1090                         } else {
1091
1092                                 /*
1093                                  * This logic case should never happen. Ensure this is the
1094                                  * case by forcing an abort.... Remove in production.
1095                                  */
1096
1097                                 smb_panic("logic flaw in cases...\n");
1098                         }
1099                 } /* end for ( ul_curr = ulhead; ul_curr;) */
1100         } /* end for (i=0; i<num_locks && ul_head; i++) */
1101
1102         if (dbuf.dptr)
1103                 free(dbuf.dptr);
1104         
1105         return ulhead;
1106 }
1107
1108 /****************************************************************************
1109  POSIX function to release a lock. Returns True if the
1110  lock could be released, False if not.
1111 ****************************************************************************/
1112
1113 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1114 {
1115         SMB_OFF_T offset;
1116         SMB_OFF_T count;
1117         BOOL ret = True;
1118         TALLOC_CTX *ul_ctx = NULL;
1119         struct unlock_list *ulist = NULL;
1120         struct unlock_list *ul = NULL;
1121         struct posix_lock deleted_lock;
1122         int num_entries;
1123
1124         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1125                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1126
1127         /*
1128          * If the requested lock won't fit in the POSIX range, we will
1129          * pretend it was successful.
1130          */
1131
1132         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1133                 return True;
1134
1135         /*
1136          * We treat this as one unlock request for POSIX accounting purposes even
1137          * if it may have been split into multiple smaller POSIX unlock ranges.
1138          */ 
1139
1140         num_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1141
1142         if (num_entries == -1) {
1143         smb_panic("release_posix_lock: unable find entry to delete !\n");
1144         }
1145
1146         /*
1147          * If num_entries is > 0, and the lock_type we just deleted from the tdb was
1148          * a POSIX write lock, then rather than doing an unlock we need to downgrade
1149          * the POSIX lock to a read lock.
1150          */
1151
1152         if (num_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1153                 return posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK);
1154         }
1155
1156         /*
1157          * Only do the POSIX unlock when the num_entries is now zero.
1158          */
1159
1160         if (num_entries > 0) {
1161                 DEBUG(10, ("release_posix_lock: num_entries = %d\n", num_entries ));
1162                 return True;
1163         }
1164
1165         if ((ul_ctx = talloc_init()) == NULL) {
1166         DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1167                 return True; /* Not a fatal error. */
1168         }
1169
1170         if ((ul = (struct unlock_list *)talloc(ul_ctx, sizeof(struct unlock_list))) == NULL) {
1171                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1172                 talloc_destroy(ul_ctx);
1173                 return True; /* Not a fatal error. */
1174         }
1175
1176         /*
1177          * Create the initial list entry containing the
1178          * lock we want to remove.
1179          */
1180
1181         ZERO_STRUCTP(ul);
1182         ul->start = offset;
1183         ul->size = count;
1184
1185         DLIST_ADD(ulist, ul);
1186
1187         /*
1188          * The following call calculates if there are any
1189          * overlapping locks held by this process on
1190          * fd's open on the same file and creates a
1191          * list of unlock ranges that will allow
1192          * POSIX lock ranges to remain on the file whilst the
1193          * unlocks are performed.
1194          */
1195
1196         ulist = posix_unlock_list(ul_ctx, ulist, fsp);
1197
1198         /*
1199          * Release the POSIX locks on the list of ranges returned.
1200          */
1201
1202         for(; ulist; ulist = ulist->next) {
1203                 offset = ulist->start;
1204                 count = ulist->size;
1205
1206                 if(u_count == 0) {
1207
1208                         /*
1209                          * This lock must overlap with an existing lock.
1210                          * Don't do any POSIX call.
1211                          */
1212
1213                         continue;
1214                 }
1215
1216                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1217                         (double)offset, (double)count ));
1218
1219                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1220                         ret = False;
1221         }
1222
1223         talloc_destroy(ul_ctx);
1224
1225         return ret;
1226 }
1227
1228 /****************************************************************************
1229  Remove all lock entries for a specific dev/inode pair from the tdb.
1230 ****************************************************************************/
1231
1232 static void delete_posix_lock_entries(files_struct *fsp)
1233 {
1234         TDB_DATA kbuf = locking_key_fsp(fsp);
1235
1236         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1237                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1238 }
1239
1240 /****************************************************************************
1241  Debug function.
1242 ****************************************************************************/
1243
1244 static void dump_entry(struct posix_lock *pl)
1245 {
1246         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1247                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1248 }
1249
1250 /****************************************************************************
1251  Remove any locks on this fd. Called from file_close().
1252 ****************************************************************************/
1253
1254 void posix_locking_close_file(files_struct *fsp)
1255 {
1256         struct posix_lock *entries = NULL;
1257         size_t count, i;
1258
1259         /*
1260          * Optimization for the common case where we are the only
1261          * opener of a file. If all fd entries are our own, we don't
1262          * need to explicitly release all the locks via the POSIX functions,
1263          * we can just remove all the entries in the tdb and allow the
1264          * close to remove the real locks.
1265          */
1266
1267         count = get_posix_lock_entries(fsp, &entries);
1268
1269         if (count == 0) {
1270                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1271                 return;
1272         }
1273
1274         for (i = 0; i < count; i++) {
1275                 if (entries[i].fd != fsp->fd )
1276                         break;
1277
1278                 dump_entry(&entries[i]);
1279         }
1280
1281         if (i == count) {
1282                 /* All locks are ours. */
1283                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1284                         fsp->fsp_name, (unsigned int)count ));
1285                 free((char *)entries);
1286                 delete_posix_lock_entries(fsp);
1287                 return;
1288         }
1289
1290         /*
1291          * Difficult case. We need to delete all our locks, whilst leaving
1292          * all other POSIX locks in place.
1293          */
1294
1295         for (i = 0; i < count; i++) {
1296                 struct posix_lock *pl = &entries[i];
1297                 release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1298         }
1299         free((char *)entries);
1300 }
1301
1302 /*******************************************************************
1303  Create the in-memory POSIX lock databases.
1304 ********************************************************************/
1305
1306 BOOL posix_locking_init(void)
1307 {
1308         if (posix_lock_tdb && posix_pending_close_tdb)
1309                 return True;
1310
1311         if (!posix_lock_tdb)
1312                 posix_lock_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1313                                           O_RDWR|O_CREAT, 0644);
1314     if (!posix_lock_tdb) {
1315         DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1316                 return False;
1317     }
1318         if (!posix_pending_close_tdb)
1319                 posix_pending_close_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1320                     O_RDWR|O_CREAT, 0644);
1321     if (!posix_pending_close_tdb) {
1322         DEBUG(0,("Failed to open POSIX pending close database.\n"));
1323                 return False;
1324     }
1325
1326         return True;
1327 }
1328
1329 /*******************************************************************
1330  Delete the in-memory POSIX lock databases.
1331 ********************************************************************/
1332
1333 BOOL posix_locking_end(void)
1334 {
1335     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1336                 return False;
1337     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1338                 return False;
1339         return True;
1340 }