NFS v2 can return ENOLCK when greater than 31 bit offsets are used.
[ira/wip.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28
29 /*
30  * The POSIX locking database handle.
31  */
32
33 static TDB_CONTEXT *posix_lock_tdb;
34
35 /*
36  * The pending close database handle.
37  */
38
39 static TDB_CONTEXT *posix_pending_close_tdb;
40
41 /*
42  * The data in POSIX lock records is an unsorted linear array of these
43  * records.  It is unnecessary to store the count as tdb provides the
44  * size of the record.
45  */
46
47 struct posix_lock {
48         int fd;
49         SMB_OFF_T start;
50         SMB_OFF_T size;
51         int lock_type;
52 };
53
54 /*
55  * The data in POSIX pending close records is an unsorted linear array of int
56  * records.  It is unnecessary to store the count as tdb provides the
57  * size of the record.
58  */
59
60 /* The key used in both the POSIX databases. */
61
62 struct posix_lock_key {
63         SMB_DEV_T device;
64         SMB_INO_T inode;
65 }; 
66
67 /*******************************************************************
68  Form a static locking key for a dev/inode pair.
69 ******************************************************************/
70
71 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
72 {
73         static struct posix_lock_key key;
74         TDB_DATA kbuf;
75
76         memset(&key, '\0', sizeof(key));
77         key.device = dev;
78         key.inode = inode;
79         kbuf.dptr = (char *)&key;
80         kbuf.dsize = sizeof(key);
81         return kbuf;
82 }
83
84 /*******************************************************************
85  Convenience function to get a key from an fsp.
86 ******************************************************************/
87
88 static TDB_DATA locking_key_fsp(files_struct *fsp)
89 {
90         return locking_key(fsp->dev, fsp->inode);
91 }
92
93 /****************************************************************************
94  Add an fd to the pending close tdb.
95 ****************************************************************************/
96
97 static BOOL add_fd_to_close_entry(files_struct *fsp)
98 {
99         TDB_DATA kbuf = locking_key_fsp(fsp);
100         TDB_DATA dbuf;
101         char *tp;
102
103         dbuf.dptr = NULL;
104
105         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
106
107         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
108         if (!tp) {
109                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
110                 if (dbuf.dptr)
111                         free(dbuf.dptr);
112                 return False;
113         } else
114                 dbuf.dptr = tp;
115
116         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
117         dbuf.dsize += sizeof(int);
118
119         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
120                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
121         }
122
123         free(dbuf.dptr);
124         return True;
125 }
126
127 /****************************************************************************
128  Remove all fd entries for a specific dev/inode pair from the tdb.
129 ****************************************************************************/
130
131 static void delete_close_entries(files_struct *fsp)
132 {
133         TDB_DATA kbuf = locking_key_fsp(fsp);
134
135         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
136                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
137 }
138
139 /****************************************************************************
140  Get the array of POSIX pending close records for an open fsp. Caller must
141  free. Returns number of entries.
142 ****************************************************************************/
143
144 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
145 {
146         TDB_DATA kbuf = locking_key_fsp(fsp);
147         TDB_DATA dbuf;
148         size_t count = 0;
149
150         *entries = NULL;
151         dbuf.dptr = NULL;
152
153         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
154
155     if (!dbuf.dptr) {
156                 return 0;
157         }
158
159         *entries = (int *)dbuf.dptr;
160         count = (size_t)(dbuf.dsize / sizeof(int));
161
162         return count;
163 }
164
165 /****************************************************************************
166  Get the array of POSIX locks for an fsp. Caller must free. Returns
167  number of entries.
168 ****************************************************************************/
169
170 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
171 {
172         TDB_DATA kbuf = locking_key_fsp(fsp);
173         TDB_DATA dbuf;
174         size_t count = 0;
175
176         *entries = NULL;
177
178         dbuf.dptr = NULL;
179
180         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
181
182     if (!dbuf.dptr) {
183                 return 0;
184         }
185
186         *entries = (struct posix_lock *)dbuf.dptr;
187         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
188
189         return count;
190 }
191
192 /****************************************************************************
193  Deal with pending closes needed by POSIX locking support.
194  Note that posix_locking_close_file() is expected to have been called
195  to delete all locks on this fsp before this function is called.
196 ****************************************************************************/
197
198 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
199 {
200         int saved_errno = 0;
201         int ret;
202         size_t count, i;
203         struct posix_lock *entries = NULL;
204         int *fd_array = NULL;
205         BOOL locks_on_other_fds = False;
206
207         if (!lp_posix_locking(SNUM(conn))) {
208                 /*
209                  * No POSIX to worry about, just close.
210                  */
211                 ret = conn->vfs_ops.close(fsp,fsp->fd);
212                 fsp->fd = -1;
213                 return ret;
214         }
215
216         /*
217          * Get the number of outstanding POSIX locks on this dev/inode pair.
218          */
219
220         count = get_posix_lock_entries(fsp, &entries);
221
222         /*
223          * Check if there are any outstanding locks belonging to
224          * other fd's. This should never be the case if posix_locking_close_file()
225          * has been called first, but it never hurts to be *sure*.
226          */
227
228         for (i = 0; i < count; i++) {
229                 if (entries[i].fd != fsp->fd) {
230                         locks_on_other_fds = True;
231                         break;
232                 }
233         }
234
235         if (locks_on_other_fds) {
236
237                 /*
238                  * There are outstanding locks on this dev/inode pair on other fds.
239                  * Add our fd to the pending close tdb and set fsp->fd to -1.
240                  */
241
242                 if (!add_fd_to_close_entry(fsp)) {
243                         free((char *)entries);
244                         return False;
245                 }
246
247                 free((char *)entries);
248                 fsp->fd = -1;
249                 return 0;
250         }
251
252         if(entries)
253                 free((char *)entries);
254
255         /*
256          * No outstanding POSIX locks. Get the pending close fd's
257          * from the tdb and close them all.
258          */
259
260         count = get_posix_pending_close_entries(fsp, &fd_array);
261
262         if (count) {
263                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
264
265                 for(i = 0; i < count; i++) {
266                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
267                                 saved_errno = errno;
268                         }
269                 }
270
271                 /*
272                  * Delete all fd's stored in the tdb
273                  * for this dev/inode pair.
274                  */
275
276                 delete_close_entries(fsp);
277         }
278
279         if (fd_array)
280                 free((char *)fd_array);
281
282         /*
283          * Finally close the fd associated with this fsp.
284          */
285
286         ret = conn->vfs_ops.close(fsp,fsp->fd);
287
288         if (saved_errno != 0) {
289         errno = saved_errno;
290                 ret = -1;
291     } 
292
293         fsp->fd = -1;
294
295         return ret;
296 }
297
298 /****************************************************************************
299  Debugging aid :-).
300 ****************************************************************************/
301
302 static const char *posix_lock_type_name(int lock_type)
303 {
304         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
305 }
306
307 /****************************************************************************
308  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
309  then the POSIX fcntl lock fails.
310 ****************************************************************************/
311
312 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
313 {
314         TDB_DATA kbuf = locking_key_fsp(fsp);
315         TDB_DATA dbuf;
316         struct posix_lock *locks;
317         size_t count;
318
319         dbuf.dptr = NULL;
320         
321         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
322
323         if (!dbuf.dptr) {
324                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
325                 goto fail;
326         }
327
328         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
329         locks = (struct posix_lock *)dbuf.dptr;
330
331         if (count == 1) {
332                 tdb_delete(posix_lock_tdb, kbuf);
333         } else {
334                 if (entry < count-1) {
335                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
336                 }
337                 dbuf.dsize -= sizeof(*locks);
338                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
339         }
340
341         free(dbuf.dptr);
342
343         return True;
344
345  fail:
346     if (dbuf.dptr)
347                 free(dbuf.dptr);
348     return False;
349 }
350
351 /****************************************************************************
352  Add an entry into the POSIX locking tdb. We return the index number of the
353  added lock (used in case we need to delete *exactly* this entry). Returns
354  False on fail, True on success.
355 ****************************************************************************/
356
357 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
358 {
359         TDB_DATA kbuf = locking_key_fsp(fsp);
360         TDB_DATA dbuf;
361         struct posix_lock pl;
362         char *tp;
363
364         dbuf.dptr = NULL;
365
366         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
367
368         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
369
370         /*
371          * Add new record.
372          */
373
374         pl.fd = fsp->fd;
375         pl.start = start;
376         pl.size = size;
377         pl.lock_type = lock_type;
378
379         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
380         if (!tp) {
381                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
382                 goto fail;
383         } else
384                 dbuf.dptr = tp;
385
386         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
387         dbuf.dsize += sizeof(pl);
388
389         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
390                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
391                 goto fail;
392         }
393
394     free(dbuf.dptr);
395
396         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
397                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
398                         (double)fsp->dev, (double)fsp->inode ));
399
400     return True;
401
402  fail:
403     if (dbuf.dptr)
404                 free(dbuf.dptr);
405     return False;
406 }
407
408 /****************************************************************************
409  Calculate if locks have any overlap at all.
410 ****************************************************************************/
411
412 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
413 {
414         if (start1 >= start2 && start1 <= start2 + size2)
415                 return True;
416
417         if (start1 < start2 && start1 + size1 > start2)
418                 return True;
419
420         return False;
421 }
422
423 /****************************************************************************
424  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
425  deleted and the number of records that are overlapped by this one, or -1 on error.
426 ****************************************************************************/
427
428 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
429 {
430         TDB_DATA kbuf = locking_key_fsp(fsp);
431         TDB_DATA dbuf;
432         struct posix_lock *locks;
433         size_t i, count;
434         BOOL found = False;
435         int num_overlapping_records = 0;
436
437         dbuf.dptr = NULL;
438         
439         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
440
441         if (!dbuf.dptr) {
442                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
443                 goto fail;
444         }
445
446         /* There are existing locks - find a match. */
447         locks = (struct posix_lock *)dbuf.dptr;
448         count = (size_t)(dbuf.dsize / sizeof(*locks));
449
450         /*
451          * Search for and delete the first record that matches the
452          * unlock criteria.
453          */
454
455         for (i=0; i<count; i++) { 
456                 struct posix_lock *entry = &locks[i];
457
458                 if (entry->fd == fsp->fd &&
459                         entry->start == start &&
460                         entry->size == size) {
461
462                         /* Make a copy if requested. */
463                         if (pl)
464                                 *pl = *entry;
465
466                         /* Found it - delete it. */
467                         if (count == 1) {
468                                 tdb_delete(posix_lock_tdb, kbuf);
469                         } else {
470                                 if (i < count-1) {
471                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
472                                 }
473                                 dbuf.dsize -= sizeof(*locks);
474                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
475                         }
476                         count--;
477                         found = True;
478                         break;
479                 }
480         }
481
482         if (!found)
483                 goto fail;
484
485         /*
486          * Count the number of entries that are
487          * overlapped by this unlock request.
488          */
489
490         for (i = 0; i < count; i++) {
491                 struct posix_lock *entry = &locks[i];
492
493                 if (fsp->fd == entry->fd &&
494                         does_lock_overlap( start, size, entry->start, entry->size))
495                                 num_overlapping_records++;
496         }
497
498         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
499                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
500                                 (unsigned int)num_overlapping_records ));
501
502     if (dbuf.dptr)
503                 free(dbuf.dptr);
504
505         return num_overlapping_records;
506
507  fail:
508     if (dbuf.dptr)
509                 free(dbuf.dptr);
510     return -1;
511 }
512
513 /****************************************************************************
514  Utility function to map a lock type correctly depending on the open
515  mode of a file.
516 ****************************************************************************/
517
518 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
519 {
520         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
521                 /*
522                  * Many UNIX's cannot get a write lock on a file opened read-only.
523                  * Win32 locking semantics allow this.
524                  * Do the best we can and attempt a read-only lock.
525                  */
526                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
527                 return F_RDLCK;
528         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
529                 /*
530                  * Ditto for read locks on write only files.
531                  */
532                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
533                 return F_WRLCK;
534         }
535
536   /*
537    * This return should be the most normal, as we attempt
538    * to always open files read/write.
539    */
540
541   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
542 }
543
544 /****************************************************************************
545  Check to see if the given unsigned lock range is within the possible POSIX
546  range. Modifies the given args to be in range if possible, just returns
547  False if not.
548 ****************************************************************************/
549
550 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
551                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
552 {
553         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
554         SMB_OFF_T count = (SMB_OFF_T)u_count;
555
556         /*
557          * For the type of system we are, attempt to
558          * find the maximum positive lock offset as an SMB_OFF_T.
559          */
560
561 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
562
563         /*
564          * In this case SMB_OFF_T is 64 bits,
565          * and the underlying system can handle 64 bit signed locks.
566          */
567
568     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
569     SMB_OFF_T mask = (mask2<<1);
570     SMB_OFF_T max_positive_lock_offset = ~mask;
571
572 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
573
574         /*
575          * In this case either SMB_OFF_T is 32 bits,
576          * or the underlying system cannot handle 64 bit signed locks.
577          * All offsets & counts must be 2^31 or less.
578          */
579
580     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
581
582 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
583
584         /*
585          * POSIX locks of length zero mean lock to end-of-file.
586          * Win32 locks of length zero are point probes. Ignore
587          * any Win32 locks of length zero. JRA.
588          */
589
590         if (count == (SMB_OFF_T)0) {
591                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
592                 return False;
593         }
594
595         /*
596          * If the given offset was > max_positive_lock_offset then we cannot map this at all
597          * ignore this lock.
598          */
599
600         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
601                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
602                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
603                 return False;
604         }
605
606         /*
607          * We must truncate the offset and count to less than max_positive_lock_offset.
608          */
609
610         offset &= max_positive_lock_offset;
611         count &= max_positive_lock_offset;
612
613
614         /*
615          * Deal with a very common case of count of all ones.
616          * (lock entire file).
617          */
618
619         if(count == (SMB_OFF_T)-1)
620                 count = max_positive_lock_offset;
621
622         /*
623          * Truncate count to end at max lock offset.
624          */
625
626         if (offset + count < 0 || offset + count > max_positive_lock_offset)
627                 count = max_positive_lock_offset - offset;
628
629         /*
630          * If we ate all the count, ignore this lock.
631          */
632
633         if (count == 0) {
634                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
635                                 (double)u_offset, (double)u_count ));
636                 return False;
637         }
638
639         /*
640          * The mapping was successful.
641          */
642
643         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
644                         (double)offset, (double)count ));
645
646         *offset_out = offset;
647         *count_out = count;
648         
649         return True;
650 }
651
652 /****************************************************************************
653  Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
654 ****************************************************************************/
655
656 uint32 map_lock_offset(uint32 high, uint32 low)
657 {
658         unsigned int i;
659         uint32 mask = 0;
660         uint32 highcopy = high;
661
662         /*
663          * Try and find out how many significant bits there are in high.
664          */
665
666         for(i = 0; highcopy; i++)
667                 highcopy >>= 1;
668
669         /*
670          * We use 31 bits not 32 here as POSIX
671          * lock offsets may not be negative.
672          */
673
674         mask = (~0) << (31 - i);
675
676         if(low & mask)
677                 return 0; /* Fail. */
678
679         high <<= (31 - i);
680
681         return (high|low);
682 }
683
684 /****************************************************************************
685  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
686  broken NFS implementations.
687 ****************************************************************************/
688
689 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
690 {
691         int ret;
692         struct connection_struct *conn = fsp->conn;
693
694 #if defined(LARGE_SMB_OFF_T)
695         /*
696          * In the 64 bit locking case we store the original
697          * values in case we have to map to a 32 bit lock on
698          * a filesystem that doesn't support 64 bit locks.
699          */
700         SMB_OFF_T orig_offset = offset;
701         SMB_OFF_T orig_count = count;
702 #endif /* LARGE_SMB_OFF_T */
703
704         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
705
706         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
707
708         if (!ret && ((errno == EFBIG) || (errno == ENOLCK))) {
709                 if( DEBUGLVL( 0 )) {
710                         dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
711                         dbgtext("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno));
712                         dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
713                 }
714                 /* 32 bit NFS file system, retry with smaller offset */
715                 errno = 0;
716                 count &= 0x7fffffff;
717                 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
718         }
719
720         /* A lock query - just return. */
721         if (op == SMB_F_GETLK)
722                 return ret;
723
724         /* A lock set or unset. */
725         if (!ret) {
726                 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
727                                 (double)offset,(double)count,op,type,strerror(errno)));
728
729                 /* Perhaps it doesn't support this sort of locking ? */
730                 if (errno == EINVAL) {
731 #if defined(LARGE_SMB_OFF_T)
732                         {
733                                 /*
734                                  * Ok - if we get here then we have a 64 bit lock request
735                                  * that has returned EINVAL. Try and map to 31 bits for offset
736                                  * and length and try again. This may happen if a filesystem
737                                  * doesn't support 64 bit offsets (efs/ufs) although the underlying
738                                  * OS does.
739                                  */
740                                 uint32 off_low = (orig_offset & 0xFFFFFFFF);
741                                 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
742
743                                 count = (orig_count & 0x7FFFFFFF);
744                                 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
745                                 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
746                                 if (!ret) {
747                                         if (errno == EINVAL) {
748                                                 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
749                                                 return(True);
750                                         }
751                                         return False;
752                                 }
753                                 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
754                                 return True;
755                         }
756 #else /* LARGE_SMB_OFF_T */
757                         DEBUG(3,("locking not supported? returning True\n"));
758                         return(True);
759 #endif /* LARGE_SMB_OFF_T */
760                 }
761
762                 return(False);
763         }
764
765         DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
766
767         return(True);
768 }
769
770 /****************************************************************************
771  POSIX function to see if a file region is locked. Returns True if the
772  region is locked, False otherwise.
773 ****************************************************************************/
774
775 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
776 {
777         SMB_OFF_T offset;
778         SMB_OFF_T count;
779         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
780
781         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
782                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
783
784         /*
785          * If the requested lock won't fit in the POSIX range, we will
786          * never set it, so presume it is not locked.
787          */
788
789         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
790                 return False;
791
792         /*
793          * Note that most UNIX's can *test* for a write lock on
794          * a read-only fd, just not *set* a write lock on a read-only
795          * fd. So we don't need to use map_lock_type here.
796          */ 
797
798         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
799 }
800
801 /*
802  * Structure used when splitting a lock range
803  * into a POSIX lock range. Doubly linked list.
804  */
805
806 struct lock_list {
807     struct lock_list *next;
808     struct lock_list *prev;
809     SMB_OFF_T start;
810     SMB_OFF_T size;
811 };
812
813 /****************************************************************************
814  Create a list of lock ranges that don't overlap a given range. Used in calculating
815  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
816  understand it :-).
817 ****************************************************************************/
818
819 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
820 {
821         TDB_DATA kbuf = locking_key_fsp(fsp);
822         TDB_DATA dbuf;
823         struct posix_lock *locks;
824         size_t num_locks, i;
825
826         dbuf.dptr = NULL;
827
828         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
829
830         if (!dbuf.dptr)
831                 return lhead;
832         
833         locks = (struct posix_lock *)dbuf.dptr;
834         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
835
836         /*
837          * Check the current lock list on this dev/inode pair.
838          * Quit if the list is deleted.
839          */
840
841         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
842                 (double)lhead->start, (double)lhead->size ));
843
844         for (i=0; i<num_locks && lhead; i++) {
845
846                 struct posix_lock *lock = &locks[i];
847                 struct lock_list *l_curr;
848
849                 /*
850                  * Walk the lock list, checking for overlaps. Note that
851                  * the lock list can expand within this loop if the current
852                  * range being examined needs to be split.
853                  */
854
855                 for (l_curr = lhead; l_curr;) {
856
857                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
858                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
859
860                         if ( (l_curr->start >= (lock->start + lock->size)) ||
861                                  (lock->start >= (l_curr->start + l_curr->size))) {
862
863                                 /* No overlap with this lock - leave this range alone. */
864 /*********************************************
865                                              +---------+
866                                              | l_curr  |
867                                              +---------+
868                                 +-------+
869                                 | lock  |
870                                 +-------+
871 OR....
872              +---------+
873              |  l_curr |
874              +---------+
875 **********************************************/
876
877                                 DEBUG(10,("no overlap case.\n" ));
878
879                                 l_curr = l_curr->next;
880
881                         } else if ( (l_curr->start >= lock->start) &&
882                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
883
884                                 /*
885                                  * This unlock is completely overlapped by this existing lock range
886                                  * and thus should have no effect (not be unlocked). Delete it from the list.
887                                  */
888 /*********************************************
889                 +---------+
890                 |  l_curr |
891                 +---------+
892         +---------------------------+
893         |       lock                |
894         +---------------------------+
895 **********************************************/
896                                 /* Save the next pointer */
897                                 struct lock_list *ul_next = l_curr->next;
898
899                                 DEBUG(10,("delete case.\n" ));
900
901                                 DLIST_REMOVE(lhead, l_curr);
902                                 if(lhead == NULL)
903                                         break; /* No more list... */
904
905                                 l_curr = ul_next;
906                                 
907                         } else if ( (l_curr->start >= lock->start) &&
908                                                 (l_curr->start < lock->start + lock->size) &&
909                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
910
911                                 /*
912                                  * This unlock overlaps the existing lock range at the high end.
913                                  * Truncate by moving start to existing range end and reducing size.
914                                  */
915 /*********************************************
916                 +---------------+
917                 |  l_curr       |
918                 +---------------+
919         +---------------+
920         |    lock       |
921         +---------------+
922 BECOMES....
923                         +-------+
924                         | l_curr|
925                         +-------+
926 **********************************************/
927
928                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
929                                 l_curr->start = lock->start + lock->size;
930
931                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
932                                                                 (double)l_curr->start, (double)l_curr->size ));
933
934                                 l_curr = l_curr->next;
935
936                         } else if ( (l_curr->start < lock->start) &&
937                                                 (l_curr->start + l_curr->size > lock->start) &&
938                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
939
940                                 /*
941                                  * This unlock overlaps the existing lock range at the low end.
942                                  * Truncate by reducing size.
943                                  */
944 /*********************************************
945    +---------------+
946    |  l_curr       |
947    +---------------+
948            +---------------+
949            |    lock       |
950            +---------------+
951 BECOMES....
952    +-------+
953    | l_curr|
954    +-------+
955 **********************************************/
956
957                                 l_curr->size = lock->start - l_curr->start;
958
959                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
960                                                                 (double)l_curr->start, (double)l_curr->size ));
961
962                                 l_curr = l_curr->next;
963                 
964                         } else if ( (l_curr->start < lock->start) &&
965                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
966                                 /*
967                                  * Worst case scenario. Unlock request completely overlaps an existing
968                                  * lock range. Split the request into two, push the new (upper) request
969                                  * into the dlink list, and continue with the entry after ul_new (as we
970                                  * know that ul_new will not overlap with this lock).
971                                  */
972 /*********************************************
973         +---------------------------+
974         |        l_curr             |
975         +---------------------------+
976                 +---------+
977                 | lock    |
978                 +---------+
979 BECOMES.....
980         +-------+         +---------+
981         | l_curr|         | l_new   |
982         +-------+         +---------+
983 **********************************************/
984                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
985                                                                                                         sizeof(struct lock_list));
986
987                                 if(l_new == NULL) {
988                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
989                                         return NULL; /* The talloc_destroy takes care of cleanup. */
990                                 }
991
992                                 ZERO_STRUCTP(l_new);
993                                 l_new->start = lock->start + lock->size;
994                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
995
996                                 /* Truncate the l_curr. */
997                                 l_curr->size = lock->start - l_curr->start;
998
999                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
1000 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
1001                                                                 (double)l_new->start, (double)l_new->size ));
1002
1003                                 /*
1004                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
1005                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
1006                                  */
1007
1008                                 l_new->prev = l_curr;
1009                                 l_new->next = l_curr->next;
1010                                 l_curr->next = l_new;
1011
1012                                 /* And move after the link we added. */
1013                                 l_curr = l_new->next;
1014
1015                         } else {
1016
1017                                 /*
1018                                  * This logic case should never happen. Ensure this is the
1019                                  * case by forcing an abort.... Remove in production.
1020                                  */
1021                                 pstring msg;
1022
1023                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
1024 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
1025
1026                                 smb_panic(msg);
1027                         }
1028                 } /* end for ( l_curr = lhead; l_curr;) */
1029         } /* end for (i=0; i<num_locks && ul_head; i++) */
1030
1031         if (dbuf.dptr)
1032                 free(dbuf.dptr);
1033         
1034         return lhead;
1035 }
1036
1037 /****************************************************************************
1038  POSIX function to acquire a lock. Returns True if the
1039  lock could be granted, False if not.
1040 ****************************************************************************/
1041
1042 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
1043 {
1044         SMB_OFF_T offset;
1045         SMB_OFF_T count;
1046         BOOL ret = True;
1047         size_t entry_num = 0;
1048         size_t lock_count;
1049         TALLOC_CTX *l_ctx = NULL;
1050         struct lock_list *llist = NULL;
1051         struct lock_list *ll = NULL;
1052         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1053
1054         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
1055                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1056
1057         /*
1058          * If the requested lock won't fit in the POSIX range, we will
1059          * pretend it was successful.
1060          */
1061
1062         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1063                 return True;
1064
1065         /*
1066          * Windows is very strange. It allows read locks to be overlayed
1067          * (even over a write lock), but leaves the write lock in force until the first
1068          * unlock. It also reference counts the locks. This means the following sequence :
1069          *
1070          * process1                                      process2
1071          * ------------------------------------------------------------------------
1072          * WRITE LOCK : start = 2, len = 10
1073          *                                            READ LOCK: start =0, len = 10 - FAIL
1074          * READ LOCK : start = 0, len = 14 
1075          *                                            READ LOCK: start =0, len = 10 - FAIL
1076          * UNLOCK : start = 2, len = 10
1077          *                                            READ LOCK: start =0, len = 10 - OK
1078          *
1079          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1080          * would leave a single read lock over the 0-14 region. In order to
1081          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1082          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1083          * semantics that if a write lock is added, then it will be first in the array.
1084          */
1085         
1086         if ((l_ctx = talloc_init()) == NULL) {
1087                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1088                 return True; /* Not a fatal error. */
1089         }
1090
1091         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1092                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1093                 talloc_destroy(l_ctx);
1094                 return True; /* Not a fatal error. */
1095         }
1096
1097         /*
1098          * Create the initial list entry containing the
1099          * lock we want to add.
1100          */
1101
1102         ZERO_STRUCTP(ll);
1103         ll->start = offset;
1104         ll->size = count;
1105
1106         DLIST_ADD(llist, ll);
1107
1108         /*
1109          * The following call calculates if there are any
1110          * overlapping locks held by this process on
1111          * fd's open on the same file and splits this list
1112          * into a list of lock ranges that do not overlap with existing
1113          * POSIX locks.
1114          */
1115
1116         llist = posix_lock_list(l_ctx, llist, fsp);
1117
1118         /*
1119          * Now we have the list of ranges to lock it is safe to add the
1120          * entry into the POSIX lock tdb. We take note of the entry we
1121          * added here in case we have to remove it on POSIX lock fail.
1122          */
1123
1124         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1125                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1126                 talloc_destroy(l_ctx);
1127                 return False;
1128         }
1129
1130         /*
1131          * Add the POSIX locks on the list of ranges returned.
1132          * As the lock is supposed to be added atomically, we need to
1133          * back out all the locks if any one of these calls fail.
1134          */
1135
1136         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1137                 offset = ll->start;
1138                 count = ll->size;
1139
1140                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1141                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1142
1143                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1144                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1145                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1146                         ret = False;
1147                         break;
1148                 }
1149         }
1150
1151         if (!ret) {
1152
1153                 /*
1154                  * Back out all the POSIX locks we have on fail.
1155                  */
1156
1157                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1158                         offset = ll->start;
1159                         count = ll->size;
1160
1161                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1162                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1163
1164                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1165                 }
1166
1167                 /*
1168                  * Remove the tdb entry for this lock.
1169                  */
1170
1171                 delete_posix_lock_entry_by_index(fsp,entry_num);
1172         }
1173
1174         talloc_destroy(l_ctx);
1175         return ret;
1176 }
1177
1178 /****************************************************************************
1179  POSIX function to release a lock. Returns True if the
1180  lock could be released, False if not.
1181 ****************************************************************************/
1182
1183 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1184 {
1185         SMB_OFF_T offset;
1186         SMB_OFF_T count;
1187         BOOL ret = True;
1188         TALLOC_CTX *ul_ctx = NULL;
1189         struct lock_list *ulist = NULL;
1190         struct lock_list *ul = NULL;
1191         struct posix_lock deleted_lock;
1192         int num_overlapped_entries;
1193
1194         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1195                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1196
1197         /*
1198          * If the requested lock won't fit in the POSIX range, we will
1199          * pretend it was successful.
1200          */
1201
1202         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1203                 return True;
1204
1205         /*
1206          * We treat this as one unlock request for POSIX accounting purposes even
1207          * if it may later be split into multiple smaller POSIX unlock ranges.
1208          * num_overlapped_entries is the number of existing locks that have any
1209          * overlap with this unlock request.
1210          */ 
1211
1212         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1213
1214         if (num_overlapped_entries == -1) {
1215         smb_panic("release_posix_lock: unable find entry to delete !\n");
1216         }
1217
1218         /*
1219          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1220          * a POSIX write lock, then before doing the unlock we need to downgrade
1221          * the POSIX lock to a read lock. This allows any overlapping read locks
1222          * to be atomically maintained.
1223          */
1224
1225         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1226                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1227                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1228                         return False;
1229                 }
1230         }
1231
1232         if ((ul_ctx = talloc_init()) == NULL) {
1233                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1234                 return True; /* Not a fatal error. */
1235         }
1236
1237         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1238                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1239                 talloc_destroy(ul_ctx);
1240                 return True; /* Not a fatal error. */
1241         }
1242
1243         /*
1244          * Create the initial list entry containing the
1245          * lock we want to remove.
1246          */
1247
1248         ZERO_STRUCTP(ul);
1249         ul->start = offset;
1250         ul->size = count;
1251
1252         DLIST_ADD(ulist, ul);
1253
1254         /*
1255          * The following call calculates if there are any
1256          * overlapping locks held by this process on
1257          * fd's open on the same file and creates a
1258          * list of unlock ranges that will allow
1259          * POSIX lock ranges to remain on the file whilst the
1260          * unlocks are performed.
1261          */
1262
1263         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1264
1265         /*
1266          * Release the POSIX locks on the list of ranges returned.
1267          */
1268
1269         for(; ulist; ulist = ulist->next) {
1270                 offset = ulist->start;
1271                 count = ulist->size;
1272
1273                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1274                         (double)offset, (double)count ));
1275
1276                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1277                         ret = False;
1278         }
1279
1280         talloc_destroy(ul_ctx);
1281
1282         return ret;
1283 }
1284
1285 /****************************************************************************
1286  Remove all lock entries for a specific dev/inode pair from the tdb.
1287 ****************************************************************************/
1288
1289 static void delete_posix_lock_entries(files_struct *fsp)
1290 {
1291         TDB_DATA kbuf = locking_key_fsp(fsp);
1292
1293         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1294                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1295 }
1296
1297 /****************************************************************************
1298  Debug function.
1299 ****************************************************************************/
1300
1301 static void dump_entry(struct posix_lock *pl)
1302 {
1303         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1304                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1305 }
1306
1307 /****************************************************************************
1308  Remove any locks on this fd. Called from file_close().
1309 ****************************************************************************/
1310
1311 void posix_locking_close_file(files_struct *fsp)
1312 {
1313         struct posix_lock *entries = NULL;
1314         size_t count, i;
1315
1316         /*
1317          * Optimization for the common case where we are the only
1318          * opener of a file. If all fd entries are our own, we don't
1319          * need to explicitly release all the locks via the POSIX functions,
1320          * we can just remove all the entries in the tdb and allow the
1321          * close to remove the real locks.
1322          */
1323
1324         count = get_posix_lock_entries(fsp, &entries);
1325
1326         if (count == 0) {
1327                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1328                 return;
1329         }
1330
1331         for (i = 0; i < count; i++) {
1332                 if (entries[i].fd != fsp->fd )
1333                         break;
1334
1335                 dump_entry(&entries[i]);
1336         }
1337
1338         if (i == count) {
1339                 /* All locks are ours. */
1340                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1341                         fsp->fsp_name, (unsigned int)count ));
1342                 free((char *)entries);
1343                 delete_posix_lock_entries(fsp);
1344                 return;
1345         }
1346
1347         /*
1348          * Difficult case. We need to delete all our locks, whilst leaving
1349          * all other POSIX locks in place.
1350          */
1351
1352         for (i = 0; i < count; i++) {
1353                 struct posix_lock *pl = &entries[i];
1354                 if (pl->fd == fsp->fd)
1355                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1356         }
1357         free((char *)entries);
1358 }
1359
1360 /*******************************************************************
1361  Create the in-memory POSIX lock databases.
1362 ********************************************************************/
1363
1364 BOOL posix_locking_init(int read_only)
1365 {
1366         if (posix_lock_tdb && posix_pending_close_tdb)
1367                 return True;
1368         
1369         if (!posix_lock_tdb)
1370                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1371                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1372         if (!posix_lock_tdb) {
1373                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1374                 return False;
1375         }
1376         if (!posix_pending_close_tdb)
1377                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1378                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1379         if (!posix_pending_close_tdb) {
1380                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1381                 return False;
1382         }
1383
1384         return True;
1385 }
1386
1387 /*******************************************************************
1388  Delete the in-memory POSIX lock databases.
1389 ********************************************************************/
1390
1391 BOOL posix_locking_end(void)
1392 {
1393     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1394                 return False;
1395     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1396                 return False;
1397         return True;
1398 }