r16307: Make sure we know we must pass a valid pointer here.
[samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2000
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    Revision History:
21
22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23 */
24
25 #include "includes.h"
26
27 #undef DBGC_CLASS
28 #define DBGC_CLASS DBGC_LOCKING
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76
77         memset(&key, '\0', sizeof(key));
78         key.device = dev;
79         key.inode = inode;
80         kbuf.dptr = (char *)&key;
81         kbuf.dsize = sizeof(key);
82         return kbuf;
83 }
84
85 /*******************************************************************
86  Convenience function to get a key from an fsp.
87 ******************************************************************/
88
89 static TDB_DATA locking_key_fsp(files_struct *fsp)
90 {
91         return locking_key(fsp->dev, fsp->inode);
92 }
93
94 /****************************************************************************
95  Add an fd to the pending close tdb.
96 ****************************************************************************/
97
98 static BOOL add_fd_to_close_entry(files_struct *fsp)
99 {
100         TDB_DATA kbuf = locking_key_fsp(fsp);
101         TDB_DATA dbuf;
102
103         dbuf.dptr = NULL;
104         dbuf.dsize = 0;
105
106         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
107
108         dbuf.dptr = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
109         if (!dbuf.dptr) {
110                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
111                 return False;
112         }
113
114         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fh->fd, sizeof(int));
115         dbuf.dsize += sizeof(int);
116
117         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
118                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
119         }
120
121         SAFE_FREE(dbuf.dptr);
122         return True;
123 }
124
125 /****************************************************************************
126  Remove all fd entries for a specific dev/inode pair from the tdb.
127 ****************************************************************************/
128
129 static void delete_close_entries(files_struct *fsp)
130 {
131         TDB_DATA kbuf = locking_key_fsp(fsp);
132
133         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
134                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
135 }
136
137 /****************************************************************************
138  Get the array of POSIX pending close records for an open fsp. Caller must
139  free. Returns number of entries.
140 ****************************************************************************/
141
142 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
143 {
144         TDB_DATA kbuf = locking_key_fsp(fsp);
145         TDB_DATA dbuf;
146         size_t count = 0;
147
148         *entries = NULL;
149         dbuf.dptr = NULL;
150
151         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
152
153         if (!dbuf.dptr) {
154                 return 0;
155         }
156
157         *entries = (int *)dbuf.dptr;
158         count = (size_t)(dbuf.dsize / sizeof(int));
159
160         return count;
161 }
162
163 /****************************************************************************
164  Get the array of POSIX locks for an fsp. Caller must free. Returns
165  number of entries.
166 ****************************************************************************/
167
168 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
169 {
170         TDB_DATA kbuf = locking_key_fsp(fsp);
171         TDB_DATA dbuf;
172         size_t count = 0;
173
174         *entries = NULL;
175
176         dbuf.dptr = NULL;
177
178         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
179
180         if (!dbuf.dptr) {
181                 return 0;
182         }
183
184         *entries = (struct posix_lock *)dbuf.dptr;
185         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
186
187         return count;
188 }
189
190 /****************************************************************************
191  Deal with pending closes needed by POSIX locking support.
192  Note that posix_locking_close_file() is expected to have been called
193  to delete all locks on this fsp before this function is called.
194 ****************************************************************************/
195
196 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
197 {
198         int saved_errno = 0;
199         int ret;
200         size_t count, i;
201         struct posix_lock *entries = NULL;
202         int *fd_array = NULL;
203         BOOL locks_on_other_fds = False;
204
205         if (!lp_posix_locking(SNUM(conn))) {
206                 /*
207                  * No POSIX to worry about, just close.
208                  */
209                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
210                 fsp->fh->fd = -1;
211                 return ret;
212         }
213
214         /*
215          * Get the number of outstanding POSIX locks on this dev/inode pair.
216          */
217
218         count = get_posix_lock_entries(fsp, &entries);
219
220         /*
221          * Check if there are any outstanding locks belonging to
222          * other fd's. This should never be the case if posix_locking_close_file()
223          * has been called first, but it never hurts to be *sure*.
224          */
225
226         for (i = 0; i < count; i++) {
227                 if (entries[i].fd != fsp->fh->fd) {
228                         locks_on_other_fds = True;
229                         break;
230                 }
231         }
232
233         if (locks_on_other_fds) {
234
235                 /*
236                  * There are outstanding locks on this dev/inode pair on other fds.
237                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
238                  */
239
240                 if (!add_fd_to_close_entry(fsp)) {
241                         SAFE_FREE(entries);
242                         return -1;
243                 }
244
245                 SAFE_FREE(entries);
246                 fsp->fh->fd = -1;
247                 return 0;
248         }
249
250         SAFE_FREE(entries);
251
252         /*
253          * No outstanding POSIX locks. Get the pending close fd's
254          * from the tdb and close them all.
255          */
256
257         count = get_posix_pending_close_entries(fsp, &fd_array);
258
259         if (count) {
260                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
261
262                 for(i = 0; i < count; i++) {
263                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
264                                 saved_errno = errno;
265                         }
266                 }
267
268                 /*
269                  * Delete all fd's stored in the tdb
270                  * for this dev/inode pair.
271                  */
272
273                 delete_close_entries(fsp);
274         }
275
276         SAFE_FREE(fd_array);
277
278         /*
279          * Finally close the fd associated with this fsp.
280          */
281
282         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
283
284         if (saved_errno != 0) {
285                 errno = saved_errno;
286                 ret = -1;
287         } 
288
289         fsp->fh->fd = -1;
290
291         return ret;
292 }
293
294 /****************************************************************************
295  Debugging aid :-).
296 ****************************************************************************/
297
298 static const char *posix_lock_type_name(int lock_type)
299 {
300         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
301 }
302
303 /****************************************************************************
304  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
305  then the POSIX fcntl lock fails.
306 ****************************************************************************/
307
308 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
309 {
310         TDB_DATA kbuf = locking_key_fsp(fsp);
311         TDB_DATA dbuf;
312         struct posix_lock *locks;
313         size_t count;
314
315         dbuf.dptr = NULL;
316         
317         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
318
319         if (!dbuf.dptr) {
320                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
321                 goto fail;
322         }
323
324         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
325         locks = (struct posix_lock *)dbuf.dptr;
326
327         if (count == 1) {
328                 tdb_delete(posix_lock_tdb, kbuf);
329         } else {
330                 if (entry < count-1) {
331                         memmove(&locks[entry], &locks[entry+1], sizeof(struct posix_lock)*((count-1) - entry));
332                 }
333                 dbuf.dsize -= sizeof(struct posix_lock);
334                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
335         }
336
337         SAFE_FREE(dbuf.dptr);
338
339         return True;
340
341  fail:
342
343         SAFE_FREE(dbuf.dptr);
344         return False;
345 }
346
347 /****************************************************************************
348  Add an entry into the POSIX locking tdb. We return the index number of the
349  added lock (used in case we need to delete *exactly* this entry). Returns
350  False on fail, True on success.
351 ****************************************************************************/
352
353 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
354 {
355         TDB_DATA kbuf = locking_key_fsp(fsp);
356         TDB_DATA dbuf;
357         struct posix_lock pl;
358
359         dbuf.dptr = NULL;
360         dbuf.dsize = 0;
361
362         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
363
364         *pentry_num = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
365
366         /*
367          * Add new record.
368          */
369
370         pl.fd = fsp->fh->fd;
371         pl.start = start;
372         pl.size = size;
373         pl.lock_type = lock_type;
374
375         dbuf.dptr = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(struct posix_lock));
376         if (!dbuf.dptr) {
377                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
378                 goto fail;
379         }
380
381         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(struct posix_lock));
382         dbuf.dsize += sizeof(struct posix_lock);
383
384         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
385                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
386                 goto fail;
387         }
388
389         SAFE_FREE(dbuf.dptr);
390
391         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
392                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
393                         (double)fsp->dev, (double)fsp->inode ));
394
395         return True;
396
397  fail:
398
399         SAFE_FREE(dbuf.dptr);
400         return False;
401 }
402
403 /****************************************************************************
404  Calculate if locks have any overlap at all.
405 ****************************************************************************/
406
407 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
408 {
409         if (start1 >= start2 && start1 <= start2 + size2)
410                 return True;
411
412         if (start1 < start2 && start1 + size1 > start2)
413                 return True;
414
415         return False;
416 }
417
418 /****************************************************************************
419  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
420  deleted and the number of records that are overlapped by this one, or -1 on error.
421 ****************************************************************************/
422
423 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
424 {
425         TDB_DATA kbuf = locking_key_fsp(fsp);
426         TDB_DATA dbuf;
427         struct posix_lock *locks;
428         size_t i, count;
429         BOOL found = False;
430         int num_overlapping_records = 0;
431
432         dbuf.dptr = NULL;
433         
434         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
435
436         if (!dbuf.dptr) {
437                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
438                 goto fail;
439         }
440
441         /* There are existing locks - find a match. */
442         locks = (struct posix_lock *)dbuf.dptr;
443         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
444
445         /*
446          * Search for and delete the first record that matches the
447          * unlock criteria.
448          */
449
450         for (i=0; i<count; i++) { 
451                 struct posix_lock *entry = &locks[i];
452
453                 if (entry->fd == fsp->fh->fd &&
454                         entry->start == start &&
455                         entry->size == size) {
456
457                         /* Make a copy */
458                         *pl = *entry;
459
460                         /* Found it - delete it. */
461                         if (count == 1) {
462                                 tdb_delete(posix_lock_tdb, kbuf);
463                         } else {
464                                 if (i < count-1) {
465                                         memmove(&locks[i], &locks[i+1], sizeof(struct posix_lock)*((count-1) - i));
466                                 }
467                                 dbuf.dsize -= sizeof(struct posix_lock);
468                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
469                         }
470                         count--;
471                         found = True;
472                         break;
473                 }
474         }
475
476         if (!found)
477                 goto fail;
478
479         /*
480          * Count the number of entries that are
481          * overlapped by this unlock request.
482          */
483
484         for (i = 0; i < count; i++) {
485                 struct posix_lock *entry = &locks[i];
486
487                 if (fsp->fh->fd == entry->fd &&
488                         does_lock_overlap( start, size, entry->start, entry->size))
489                                 num_overlapping_records++;
490         }
491
492         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
493                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
494                                 (unsigned int)num_overlapping_records ));
495
496         SAFE_FREE(dbuf.dptr);
497
498         return num_overlapping_records;
499
500  fail:
501
502         SAFE_FREE(dbuf.dptr);
503         return -1;
504 }
505
506 /****************************************************************************
507  Utility function to map a lock type correctly depending on the open
508  mode of a file.
509 ****************************************************************************/
510
511 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
512 {
513         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
514                 /*
515                  * Many UNIX's cannot get a write lock on a file opened read-only.
516                  * Win32 locking semantics allow this.
517                  * Do the best we can and attempt a read-only lock.
518                  */
519                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
520                 return F_RDLCK;
521         }
522 #if 0
523         /* We no longer open files write-only. */
524          else if((lock_type == READ_LOCK) && !fsp->can_read) {
525                 /*
526                  * Ditto for read locks on write only files.
527                  */
528                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
529                 return F_WRLCK;
530         }
531 #endif
532
533         /*
534          * This return should be the most normal, as we attempt
535          * to always open files read/write.
536          */
537
538         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
539 }
540
541 /****************************************************************************
542  Check to see if the given unsigned lock range is within the possible POSIX
543  range. Modifies the given args to be in range if possible, just returns
544  False if not.
545 ****************************************************************************/
546
547 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
548                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
549 {
550         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
551         SMB_OFF_T count = (SMB_OFF_T)u_count;
552
553         /*
554          * For the type of system we are, attempt to
555          * find the maximum positive lock offset as an SMB_OFF_T.
556          */
557
558 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
559
560         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
561
562 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
563
564         /*
565          * In this case SMB_OFF_T is 64 bits,
566          * and the underlying system can handle 64 bit signed locks.
567          */
568
569         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
570         SMB_OFF_T mask = (mask2<<1);
571         SMB_OFF_T max_positive_lock_offset = ~mask;
572
573 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
574
575         /*
576          * In this case either SMB_OFF_T is 32 bits,
577          * or the underlying system cannot handle 64 bit signed locks.
578          * All offsets & counts must be 2^31 or less.
579          */
580
581         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
582
583 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
584
585         /*
586          * POSIX locks of length zero mean lock to end-of-file.
587          * Win32 locks of length zero are point probes. Ignore
588          * any Win32 locks of length zero. JRA.
589          */
590
591         if (count == (SMB_OFF_T)0) {
592                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
593                 return False;
594         }
595
596         /*
597          * If the given offset was > max_positive_lock_offset then we cannot map this at all
598          * ignore this lock.
599          */
600
601         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
602                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
603                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
604                 return False;
605         }
606
607         /*
608          * We must truncate the count to less than max_positive_lock_offset.
609          */
610
611         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
612                 count = max_positive_lock_offset;
613
614         /*
615          * Truncate count to end at max lock offset.
616          */
617
618         if (offset + count < 0 || offset + count > max_positive_lock_offset)
619                 count = max_positive_lock_offset - offset;
620
621         /*
622          * If we ate all the count, ignore this lock.
623          */
624
625         if (count == 0) {
626                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
627                                 (double)u_offset, (double)u_count ));
628                 return False;
629         }
630
631         /*
632          * The mapping was successful.
633          */
634
635         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
636                         (double)offset, (double)count ));
637
638         *offset_out = offset;
639         *count_out = count;
640         
641         return True;
642 }
643
644 /****************************************************************************
645  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
646  broken NFS implementations.
647 ****************************************************************************/
648
649 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
650 {
651         BOOL ret;
652
653         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
654
655         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
656
657         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
658
659                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
660                                         (double)offset,(double)count));
661                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
662                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
663
664                 /*
665                  * If the offset is > 0x7FFFFFFF then this will cause problems on
666                  * 32 bit NFS mounted filesystems. Just ignore it.
667                  */
668
669                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
670                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
671                         return True;
672                 }
673
674                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
675                         /* 32 bit NFS file system, retry with smaller offset */
676                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
677                         errno = 0;
678                         count &= 0x7fffffff;
679                         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
680                 }
681         }
682
683         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
684         return ret;
685 }
686
687 /****************************************************************************
688  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
689  broken NFS implementations.
690 ****************************************************************************/
691
692 static BOOL posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
693 {
694         pid_t pid;
695         BOOL ret;
696
697         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
698                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
699
700         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
701
702         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
703
704                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
705                                         (double)*poffset,(double)*pcount));
706                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
707                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
708
709                 /*
710                  * If the offset is > 0x7FFFFFFF then this will cause problems on
711                  * 32 bit NFS mounted filesystems. Just ignore it.
712                  */
713
714                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
715                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
716                         return True;
717                 }
718
719                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
720                         /* 32 bit NFS file system, retry with smaller offset */
721                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
722                         errno = 0;
723                         *pcount &= 0x7fffffff;
724                         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
725                 }
726         }
727
728         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
729         return ret;
730 }
731
732
733 /****************************************************************************
734  POSIX function to see if a file region is locked. Returns True if the
735  region is locked, False otherwise.
736 ****************************************************************************/
737
738 BOOL is_posix_locked(files_struct *fsp,
739                         SMB_BIG_UINT *pu_offset,
740                         SMB_BIG_UINT *pu_count,
741                         enum brl_type *plock_type,
742                         enum brl_flavour lock_flav)
743 {
744         SMB_OFF_T offset;
745         SMB_OFF_T count;
746         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
747
748         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
749                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
750
751         /*
752          * If the requested lock won't fit in the POSIX range, we will
753          * never set it, so presume it is not locked.
754          */
755
756         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
757                 return False;
758         }
759
760         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
761                 return False;
762         }
763
764         if (posix_lock_type == F_UNLCK) {
765                 return False;
766         }
767
768         if (lock_flav == POSIX_LOCK) {
769                 /* Only POSIX lock queries need to know the details. */
770                 *pu_offset = (SMB_BIG_UINT)offset;
771                 *pu_count = (SMB_BIG_UINT)count;
772                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
773         }
774         return True;
775 }
776
777 /*
778  * Structure used when splitting a lock range
779  * into a POSIX lock range. Doubly linked list.
780  */
781
782 struct lock_list {
783         struct lock_list *next;
784         struct lock_list *prev;
785         SMB_OFF_T start;
786         SMB_OFF_T size;
787 };
788
789 /****************************************************************************
790  Create a list of lock ranges that don't overlap a given range. Used in calculating
791  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
792  understand it :-).
793 ****************************************************************************/
794
795 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
796 {
797         TDB_DATA kbuf = locking_key_fsp(fsp);
798         TDB_DATA dbuf;
799         struct posix_lock *locks;
800         size_t num_locks, i;
801
802         dbuf.dptr = NULL;
803
804         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
805
806         if (!dbuf.dptr)
807                 return lhead;
808         
809         locks = (struct posix_lock *)dbuf.dptr;
810         num_locks = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
811
812         /*
813          * Check the current lock list on this dev/inode pair.
814          * Quit if the list is deleted.
815          */
816
817         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
818                 (double)lhead->start, (double)lhead->size ));
819
820         for (i=0; i<num_locks && lhead; i++) {
821
822                 struct posix_lock *lock = &locks[i];
823                 struct lock_list *l_curr;
824
825                 /*
826                  * Walk the lock list, checking for overlaps. Note that
827                  * the lock list can expand within this loop if the current
828                  * range being examined needs to be split.
829                  */
830
831                 for (l_curr = lhead; l_curr;) {
832
833                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
834                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
835
836                         if ( (l_curr->start >= (lock->start + lock->size)) ||
837                                  (lock->start >= (l_curr->start + l_curr->size))) {
838
839                                 /* No overlap with this lock - leave this range alone. */
840 /*********************************************
841                                              +---------+
842                                              | l_curr  |
843                                              +---------+
844                                 +-------+
845                                 | lock  |
846                                 +-------+
847 OR....
848              +---------+
849              |  l_curr |
850              +---------+
851 **********************************************/
852
853                                 DEBUG(10,("no overlap case.\n" ));
854
855                                 l_curr = l_curr->next;
856
857                         } else if ( (l_curr->start >= lock->start) &&
858                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
859
860                                 /*
861                                  * This unlock is completely overlapped by this existing lock range
862                                  * and thus should have no effect (not be unlocked). Delete it from the list.
863                                  */
864 /*********************************************
865                 +---------+
866                 |  l_curr |
867                 +---------+
868         +---------------------------+
869         |       lock                |
870         +---------------------------+
871 **********************************************/
872                                 /* Save the next pointer */
873                                 struct lock_list *ul_next = l_curr->next;
874
875                                 DEBUG(10,("delete case.\n" ));
876
877                                 DLIST_REMOVE(lhead, l_curr);
878                                 if(lhead == NULL)
879                                         break; /* No more list... */
880
881                                 l_curr = ul_next;
882                                 
883                         } else if ( (l_curr->start >= lock->start) &&
884                                                 (l_curr->start < lock->start + lock->size) &&
885                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
886
887                                 /*
888                                  * This unlock overlaps the existing lock range at the high end.
889                                  * Truncate by moving start to existing range end and reducing size.
890                                  */
891 /*********************************************
892                 +---------------+
893                 |  l_curr       |
894                 +---------------+
895         +---------------+
896         |    lock       |
897         +---------------+
898 BECOMES....
899                         +-------+
900                         | l_curr|
901                         +-------+
902 **********************************************/
903
904                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
905                                 l_curr->start = lock->start + lock->size;
906
907                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
908                                                                 (double)l_curr->start, (double)l_curr->size ));
909
910                                 l_curr = l_curr->next;
911
912                         } else if ( (l_curr->start < lock->start) &&
913                                                 (l_curr->start + l_curr->size > lock->start) &&
914                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
915
916                                 /*
917                                  * This unlock overlaps the existing lock range at the low end.
918                                  * Truncate by reducing size.
919                                  */
920 /*********************************************
921    +---------------+
922    |  l_curr       |
923    +---------------+
924            +---------------+
925            |    lock       |
926            +---------------+
927 BECOMES....
928    +-------+
929    | l_curr|
930    +-------+
931 **********************************************/
932
933                                 l_curr->size = lock->start - l_curr->start;
934
935                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
936                                                                 (double)l_curr->start, (double)l_curr->size ));
937
938                                 l_curr = l_curr->next;
939                 
940                         } else if ( (l_curr->start < lock->start) &&
941                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
942                                 /*
943                                  * Worst case scenario. Unlock request completely overlaps an existing
944                                  * lock range. Split the request into two, push the new (upper) request
945                                  * into the dlink list, and continue with the entry after ul_new (as we
946                                  * know that ul_new will not overlap with this lock).
947                                  */
948 /*********************************************
949         +---------------------------+
950         |        l_curr             |
951         +---------------------------+
952                 +---------+
953                 | lock    |
954                 +---------+
955 BECOMES.....
956         +-------+         +---------+
957         | l_curr|         | l_new   |
958         +-------+         +---------+
959 **********************************************/
960                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
961
962                                 if(l_new == NULL) {
963                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
964                                         return NULL; /* The talloc_destroy takes care of cleanup. */
965                                 }
966
967                                 ZERO_STRUCTP(l_new);
968                                 l_new->start = lock->start + lock->size;
969                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
970
971                                 /* Truncate the l_curr. */
972                                 l_curr->size = lock->start - l_curr->start;
973
974                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
975 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
976                                                                 (double)l_new->start, (double)l_new->size ));
977
978                                 /*
979                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
980                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
981                                  */
982
983                                 l_new->prev = l_curr;
984                                 l_new->next = l_curr->next;
985                                 l_curr->next = l_new;
986
987                                 /* And move after the link we added. */
988                                 l_curr = l_new->next;
989
990                         } else {
991
992                                 /*
993                                  * This logic case should never happen. Ensure this is the
994                                  * case by forcing an abort.... Remove in production.
995                                  */
996                                 pstring msg;
997
998                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
999 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
1000
1001                                 smb_panic(msg);
1002                         }
1003                 } /* end for ( l_curr = lhead; l_curr;) */
1004         } /* end for (i=0; i<num_locks && ul_head; i++) */
1005
1006         SAFE_FREE(dbuf.dptr);
1007         
1008         return lhead;
1009 }
1010
1011 /****************************************************************************
1012  POSIX function to acquire a lock. Returns True if the
1013  lock could be granted, False if not.
1014  TODO -- Fix POSIX lock flavour semantics.
1015 ****************************************************************************/
1016
1017 BOOL set_posix_lock(files_struct *fsp,
1018                         SMB_BIG_UINT u_offset,
1019                         SMB_BIG_UINT u_count,
1020                         enum brl_type lock_type,
1021                         enum brl_flavour lock_flav)
1022 {
1023         SMB_OFF_T offset;
1024         SMB_OFF_T count;
1025         BOOL ret = True;
1026         size_t entry_num = 0;
1027         size_t lock_count;
1028         TALLOC_CTX *l_ctx = NULL;
1029         struct lock_list *llist = NULL;
1030         struct lock_list *ll = NULL;
1031         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1032
1033         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
1034                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1035
1036         /*
1037          * If the requested lock won't fit in the POSIX range, we will
1038          * pretend it was successful.
1039          */
1040
1041         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1042                 return True;
1043
1044         /*
1045          * Windows is very strange. It allows read locks to be overlayed
1046          * (even over a write lock), but leaves the write lock in force until the first
1047          * unlock. It also reference counts the locks. This means the following sequence :
1048          *
1049          * process1                                      process2
1050          * ------------------------------------------------------------------------
1051          * WRITE LOCK : start = 2, len = 10
1052          *                                            READ LOCK: start =0, len = 10 - FAIL
1053          * READ LOCK : start = 0, len = 14 
1054          *                                            READ LOCK: start =0, len = 10 - FAIL
1055          * UNLOCK : start = 2, len = 10
1056          *                                            READ LOCK: start =0, len = 10 - OK
1057          *
1058          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1059          * would leave a single read lock over the 0-14 region. In order to
1060          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1061          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1062          * semantics that if a write lock is added, then it will be first in the array.
1063          */
1064         
1065         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1066                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1067                 return True; /* Not a fatal error. */
1068         }
1069
1070         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1071                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1072                 talloc_destroy(l_ctx);
1073                 return True; /* Not a fatal error. */
1074         }
1075
1076         /*
1077          * Create the initial list entry containing the
1078          * lock we want to add.
1079          */
1080
1081         ZERO_STRUCTP(ll);
1082         ll->start = offset;
1083         ll->size = count;
1084
1085         DLIST_ADD(llist, ll);
1086
1087         /*
1088          * The following call calculates if there are any
1089          * overlapping locks held by this process on
1090          * fd's open on the same file and splits this list
1091          * into a list of lock ranges that do not overlap with existing
1092          * POSIX locks.
1093          */
1094
1095         llist = posix_lock_list(l_ctx, llist, fsp);
1096
1097         /*
1098          * Now we have the list of ranges to lock it is safe to add the
1099          * entry into the POSIX lock tdb. We take note of the entry we
1100          * added here in case we have to remove it on POSIX lock fail.
1101          */
1102
1103         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1104                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1105                 talloc_destroy(l_ctx);
1106                 return False;
1107         }
1108
1109         /*
1110          * Add the POSIX locks on the list of ranges returned.
1111          * As the lock is supposed to be added atomically, we need to
1112          * back out all the locks if any one of these calls fail.
1113          */
1114
1115         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1116                 offset = ll->start;
1117                 count = ll->size;
1118
1119                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1120                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1121
1122                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1123                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1124                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1125                         ret = False;
1126                         break;
1127                 }
1128         }
1129
1130         if (!ret) {
1131
1132                 /*
1133                  * Back out all the POSIX locks we have on fail.
1134                  */
1135
1136                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1137                         offset = ll->start;
1138                         count = ll->size;
1139
1140                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1141                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1142
1143                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1144                 }
1145
1146                 /*
1147                  * Remove the tdb entry for this lock.
1148                  */
1149
1150                 delete_posix_lock_entry_by_index(fsp,entry_num);
1151         }
1152
1153         talloc_destroy(l_ctx);
1154         return ret;
1155 }
1156
1157 /****************************************************************************
1158  POSIX function to release a lock. Returns True if the
1159  lock could be released, False if not.
1160 ****************************************************************************/
1161
1162 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1163 {
1164         SMB_OFF_T offset;
1165         SMB_OFF_T count;
1166         BOOL ret = True;
1167         TALLOC_CTX *ul_ctx = NULL;
1168         struct lock_list *ulist = NULL;
1169         struct lock_list *ul = NULL;
1170         struct posix_lock deleted_lock;
1171         int num_overlapped_entries;
1172
1173         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1174                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1175
1176         /*
1177          * If the requested lock won't fit in the POSIX range, we will
1178          * pretend it was successful.
1179          */
1180
1181         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1182                 return True;
1183
1184         /*
1185          * We treat this as one unlock request for POSIX accounting purposes even
1186          * if it may later be split into multiple smaller POSIX unlock ranges.
1187          * num_overlapped_entries is the number of existing locks that have any
1188          * overlap with this unlock request.
1189          */ 
1190
1191         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1192
1193         if (num_overlapped_entries == -1) {
1194                 smb_panic("release_posix_lock: unable find entry to delete !\n");
1195         }
1196
1197         /*
1198          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1199          * a POSIX write lock, then before doing the unlock we need to downgrade
1200          * the POSIX lock to a read lock. This allows any overlapping read locks
1201          * to be atomically maintained.
1202          */
1203
1204         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1205                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1206                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1207                         return False;
1208                 }
1209         }
1210
1211         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1212                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1213                 return True; /* Not a fatal error. */
1214         }
1215
1216         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1217                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1218                 talloc_destroy(ul_ctx);
1219                 return True; /* Not a fatal error. */
1220         }
1221
1222         /*
1223          * Create the initial list entry containing the
1224          * lock we want to remove.
1225          */
1226
1227         ZERO_STRUCTP(ul);
1228         ul->start = offset;
1229         ul->size = count;
1230
1231         DLIST_ADD(ulist, ul);
1232
1233         /*
1234          * The following call calculates if there are any
1235          * overlapping locks held by this process on
1236          * fd's open on the same file and creates a
1237          * list of unlock ranges that will allow
1238          * POSIX lock ranges to remain on the file whilst the
1239          * unlocks are performed.
1240          */
1241
1242         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1243
1244         /*
1245          * Release the POSIX locks on the list of ranges returned.
1246          */
1247
1248         for(; ulist; ulist = ulist->next) {
1249                 offset = ulist->start;
1250                 count = ulist->size;
1251
1252                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1253                         (double)offset, (double)count ));
1254
1255                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1256                         ret = False;
1257         }
1258
1259         talloc_destroy(ul_ctx);
1260
1261         return ret;
1262 }
1263
1264 /****************************************************************************
1265  Remove all lock entries for a specific dev/inode pair from the tdb.
1266 ****************************************************************************/
1267
1268 static void delete_posix_lock_entries(files_struct *fsp)
1269 {
1270         TDB_DATA kbuf = locking_key_fsp(fsp);
1271
1272         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1273                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1274 }
1275
1276 /****************************************************************************
1277  Debug function.
1278 ****************************************************************************/
1279
1280 static void dump_entry(struct posix_lock *pl)
1281 {
1282         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1283                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1284 }
1285
1286 /****************************************************************************
1287  Remove any locks on this fd. Called from file_close().
1288 ****************************************************************************/
1289
1290 void posix_locking_close_file(files_struct *fsp)
1291 {
1292         struct posix_lock *entries = NULL;
1293         size_t count, i;
1294
1295         /*
1296          * Optimization for the common case where we are the only
1297          * opener of a file. If all fd entries are our own, we don't
1298          * need to explicitly release all the locks via the POSIX functions,
1299          * we can just remove all the entries in the tdb and allow the
1300          * close to remove the real locks.
1301          */
1302
1303         count = get_posix_lock_entries(fsp, &entries);
1304
1305         if (count == 0) {
1306                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1307                 return;
1308         }
1309
1310         for (i = 0; i < count; i++) {
1311                 if (entries[i].fd != fsp->fh->fd )
1312                         break;
1313
1314                 dump_entry(&entries[i]);
1315         }
1316
1317         if (i == count) {
1318                 /* All locks are ours. */
1319                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1320                         fsp->fsp_name, (unsigned int)count ));
1321                 SAFE_FREE(entries);
1322                 delete_posix_lock_entries(fsp);
1323                 return;
1324         }
1325
1326         /*
1327          * Difficult case. We need to delete all our locks, whilst leaving
1328          * all other POSIX locks in place.
1329          */
1330
1331         for (i = 0; i < count; i++) {
1332                 struct posix_lock *pl = &entries[i];
1333                 if (pl->fd == fsp->fh->fd)
1334                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1335         }
1336         SAFE_FREE(entries);
1337 }
1338
1339 /*******************************************************************
1340  Create the in-memory POSIX lock databases.
1341 ********************************************************************/
1342
1343 BOOL posix_locking_init(int read_only)
1344 {
1345         if (posix_lock_tdb && posix_pending_close_tdb)
1346                 return True;
1347         
1348         if (!posix_lock_tdb)
1349                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1350                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1351         if (!posix_lock_tdb) {
1352                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1353                 return False;
1354         }
1355         if (!posix_pending_close_tdb)
1356                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1357                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1358         if (!posix_pending_close_tdb) {
1359                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1360                 return False;
1361         }
1362
1363         return True;
1364 }
1365
1366 /*******************************************************************
1367  Delete the in-memory POSIX lock databases.
1368 ********************************************************************/
1369
1370 BOOL posix_locking_end(void)
1371 {
1372     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1373                 return False;
1374     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1375                 return False;
1376         return True;
1377 }