r15018: Merge Volker's ipc/trans2/nttrans changes over
[jra/samba/.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2000
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    Revision History:
21
22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23 */
24
25 #include "includes.h"
26
27 #undef DBGC_CLASS
28 #define DBGC_CLASS DBGC_LOCKING
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76
77         memset(&key, '\0', sizeof(key));
78         key.device = dev;
79         key.inode = inode;
80         kbuf.dptr = (char *)&key;
81         kbuf.dsize = sizeof(key);
82         return kbuf;
83 }
84
85 /*******************************************************************
86  Convenience function to get a key from an fsp.
87 ******************************************************************/
88
89 static TDB_DATA locking_key_fsp(files_struct *fsp)
90 {
91         return locking_key(fsp->dev, fsp->inode);
92 }
93
94 /****************************************************************************
95  Add an fd to the pending close tdb.
96 ****************************************************************************/
97
98 static BOOL add_fd_to_close_entry(files_struct *fsp)
99 {
100         TDB_DATA kbuf = locking_key_fsp(fsp);
101         TDB_DATA dbuf;
102
103         dbuf.dptr = NULL;
104         dbuf.dsize = 0;
105
106         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
107
108         dbuf.dptr = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
109         if (!dbuf.dptr) {
110                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
111                 return False;
112         }
113
114         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fh->fd, sizeof(int));
115         dbuf.dsize += sizeof(int);
116
117         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
118                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
119         }
120
121         SAFE_FREE(dbuf.dptr);
122         return True;
123 }
124
125 /****************************************************************************
126  Remove all fd entries for a specific dev/inode pair from the tdb.
127 ****************************************************************************/
128
129 static void delete_close_entries(files_struct *fsp)
130 {
131         TDB_DATA kbuf = locking_key_fsp(fsp);
132
133         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
134                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
135 }
136
137 /****************************************************************************
138  Get the array of POSIX pending close records for an open fsp. Caller must
139  free. Returns number of entries.
140 ****************************************************************************/
141
142 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
143 {
144         TDB_DATA kbuf = locking_key_fsp(fsp);
145         TDB_DATA dbuf;
146         size_t count = 0;
147
148         *entries = NULL;
149         dbuf.dptr = NULL;
150
151         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
152
153         if (!dbuf.dptr) {
154                 return 0;
155         }
156
157         *entries = (int *)dbuf.dptr;
158         count = (size_t)(dbuf.dsize / sizeof(int));
159
160         return count;
161 }
162
163 /****************************************************************************
164  Get the array of POSIX locks for an fsp. Caller must free. Returns
165  number of entries.
166 ****************************************************************************/
167
168 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
169 {
170         TDB_DATA kbuf = locking_key_fsp(fsp);
171         TDB_DATA dbuf;
172         size_t count = 0;
173
174         *entries = NULL;
175
176         dbuf.dptr = NULL;
177
178         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
179
180         if (!dbuf.dptr) {
181                 return 0;
182         }
183
184         *entries = (struct posix_lock *)dbuf.dptr;
185         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
186
187         return count;
188 }
189
190 /****************************************************************************
191  Deal with pending closes needed by POSIX locking support.
192  Note that posix_locking_close_file() is expected to have been called
193  to delete all locks on this fsp before this function is called.
194 ****************************************************************************/
195
196 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
197 {
198         int saved_errno = 0;
199         int ret;
200         size_t count, i;
201         struct posix_lock *entries = NULL;
202         int *fd_array = NULL;
203         BOOL locks_on_other_fds = False;
204
205         if (!lp_posix_locking(SNUM(conn))) {
206                 /*
207                  * No POSIX to worry about, just close.
208                  */
209                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
210                 fsp->fh->fd = -1;
211                 return ret;
212         }
213
214         /*
215          * Get the number of outstanding POSIX locks on this dev/inode pair.
216          */
217
218         count = get_posix_lock_entries(fsp, &entries);
219
220         /*
221          * Check if there are any outstanding locks belonging to
222          * other fd's. This should never be the case if posix_locking_close_file()
223          * has been called first, but it never hurts to be *sure*.
224          */
225
226         for (i = 0; i < count; i++) {
227                 if (entries[i].fd != fsp->fh->fd) {
228                         locks_on_other_fds = True;
229                         break;
230                 }
231         }
232
233         if (locks_on_other_fds) {
234
235                 /*
236                  * There are outstanding locks on this dev/inode pair on other fds.
237                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
238                  */
239
240                 if (!add_fd_to_close_entry(fsp)) {
241                         SAFE_FREE(entries);
242                         return -1;
243                 }
244
245                 SAFE_FREE(entries);
246                 fsp->fh->fd = -1;
247                 return 0;
248         }
249
250         SAFE_FREE(entries);
251
252         /*
253          * No outstanding POSIX locks. Get the pending close fd's
254          * from the tdb and close them all.
255          */
256
257         count = get_posix_pending_close_entries(fsp, &fd_array);
258
259         if (count) {
260                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
261
262                 for(i = 0; i < count; i++) {
263                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
264                                 saved_errno = errno;
265                         }
266                 }
267
268                 /*
269                  * Delete all fd's stored in the tdb
270                  * for this dev/inode pair.
271                  */
272
273                 delete_close_entries(fsp);
274         }
275
276         SAFE_FREE(fd_array);
277
278         /*
279          * Finally close the fd associated with this fsp.
280          */
281
282         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
283
284         if (saved_errno != 0) {
285                 errno = saved_errno;
286                 ret = -1;
287         } 
288
289         fsp->fh->fd = -1;
290
291         return ret;
292 }
293
294 /****************************************************************************
295  Debugging aid :-).
296 ****************************************************************************/
297
298 static const char *posix_lock_type_name(int lock_type)
299 {
300         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
301 }
302
303 /****************************************************************************
304  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
305  then the POSIX fcntl lock fails.
306 ****************************************************************************/
307
308 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
309 {
310         TDB_DATA kbuf = locking_key_fsp(fsp);
311         TDB_DATA dbuf;
312         struct posix_lock *locks;
313         size_t count;
314
315         dbuf.dptr = NULL;
316         
317         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
318
319         if (!dbuf.dptr) {
320                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
321                 goto fail;
322         }
323
324         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
325         locks = (struct posix_lock *)dbuf.dptr;
326
327         if (count == 1) {
328                 tdb_delete(posix_lock_tdb, kbuf);
329         } else {
330                 if (entry < count-1) {
331                         memmove(&locks[entry], &locks[entry+1], sizeof(struct posix_lock)*((count-1) - entry));
332                 }
333                 dbuf.dsize -= sizeof(struct posix_lock);
334                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
335         }
336
337         SAFE_FREE(dbuf.dptr);
338
339         return True;
340
341  fail:
342
343         SAFE_FREE(dbuf.dptr);
344         return False;
345 }
346
347 /****************************************************************************
348  Add an entry into the POSIX locking tdb. We return the index number of the
349  added lock (used in case we need to delete *exactly* this entry). Returns
350  False on fail, True on success.
351 ****************************************************************************/
352
353 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
354 {
355         TDB_DATA kbuf = locking_key_fsp(fsp);
356         TDB_DATA dbuf;
357         struct posix_lock pl;
358
359         dbuf.dptr = NULL;
360         dbuf.dsize = 0;
361
362         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
363
364         *pentry_num = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
365
366         /*
367          * Add new record.
368          */
369
370         pl.fd = fsp->fh->fd;
371         pl.start = start;
372         pl.size = size;
373         pl.lock_type = lock_type;
374
375         dbuf.dptr = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(struct posix_lock));
376         if (!dbuf.dptr) {
377                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
378                 goto fail;
379         }
380
381         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(struct posix_lock));
382         dbuf.dsize += sizeof(struct posix_lock);
383
384         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
385                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
386                 goto fail;
387         }
388
389         SAFE_FREE(dbuf.dptr);
390
391         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
392                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
393                         (double)fsp->dev, (double)fsp->inode ));
394
395         return True;
396
397  fail:
398
399         SAFE_FREE(dbuf.dptr);
400         return False;
401 }
402
403 /****************************************************************************
404  Calculate if locks have any overlap at all.
405 ****************************************************************************/
406
407 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
408 {
409         if (start1 >= start2 && start1 <= start2 + size2)
410                 return True;
411
412         if (start1 < start2 && start1 + size1 > start2)
413                 return True;
414
415         return False;
416 }
417
418 /****************************************************************************
419  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
420  deleted and the number of records that are overlapped by this one, or -1 on error.
421 ****************************************************************************/
422
423 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
424 {
425         TDB_DATA kbuf = locking_key_fsp(fsp);
426         TDB_DATA dbuf;
427         struct posix_lock *locks;
428         size_t i, count;
429         BOOL found = False;
430         int num_overlapping_records = 0;
431
432         dbuf.dptr = NULL;
433         
434         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
435
436         if (!dbuf.dptr) {
437                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
438                 goto fail;
439         }
440
441         /* There are existing locks - find a match. */
442         locks = (struct posix_lock *)dbuf.dptr;
443         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
444
445         /*
446          * Search for and delete the first record that matches the
447          * unlock criteria.
448          */
449
450         for (i=0; i<count; i++) { 
451                 struct posix_lock *entry = &locks[i];
452
453                 if (entry->fd == fsp->fh->fd &&
454                         entry->start == start &&
455                         entry->size == size) {
456
457                         /* Make a copy if requested. */
458                         if (pl)
459                                 *pl = *entry;
460
461                         /* Found it - delete it. */
462                         if (count == 1) {
463                                 tdb_delete(posix_lock_tdb, kbuf);
464                         } else {
465                                 if (i < count-1) {
466                                         memmove(&locks[i], &locks[i+1], sizeof(struct posix_lock)*((count-1) - i));
467                                 }
468                                 dbuf.dsize -= sizeof(struct posix_lock);
469                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
470                         }
471                         count--;
472                         found = True;
473                         break;
474                 }
475         }
476
477         if (!found)
478                 goto fail;
479
480         /*
481          * Count the number of entries that are
482          * overlapped by this unlock request.
483          */
484
485         for (i = 0; i < count; i++) {
486                 struct posix_lock *entry = &locks[i];
487
488                 if (fsp->fh->fd == entry->fd &&
489                         does_lock_overlap( start, size, entry->start, entry->size))
490                                 num_overlapping_records++;
491         }
492
493         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
494                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
495                                 (unsigned int)num_overlapping_records ));
496
497         SAFE_FREE(dbuf.dptr);
498
499         return num_overlapping_records;
500
501  fail:
502
503         SAFE_FREE(dbuf.dptr);
504         return -1;
505 }
506
507 /****************************************************************************
508  Utility function to map a lock type correctly depending on the open
509  mode of a file.
510 ****************************************************************************/
511
512 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
513 {
514         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
515                 /*
516                  * Many UNIX's cannot get a write lock on a file opened read-only.
517                  * Win32 locking semantics allow this.
518                  * Do the best we can and attempt a read-only lock.
519                  */
520                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
521                 return F_RDLCK;
522         }
523 #if 0
524         /* We no longer open files write-only. */
525          else if((lock_type == READ_LOCK) && !fsp->can_read) {
526                 /*
527                  * Ditto for read locks on write only files.
528                  */
529                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
530                 return F_WRLCK;
531         }
532 #endif
533
534         /*
535          * This return should be the most normal, as we attempt
536          * to always open files read/write.
537          */
538
539         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
540 }
541
542 /****************************************************************************
543  Check to see if the given unsigned lock range is within the possible POSIX
544  range. Modifies the given args to be in range if possible, just returns
545  False if not.
546 ****************************************************************************/
547
548 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
549                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
550 {
551         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
552         SMB_OFF_T count = (SMB_OFF_T)u_count;
553
554         /*
555          * For the type of system we are, attempt to
556          * find the maximum positive lock offset as an SMB_OFF_T.
557          */
558
559 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
560
561         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
562
563 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
564
565         /*
566          * In this case SMB_OFF_T is 64 bits,
567          * and the underlying system can handle 64 bit signed locks.
568          */
569
570         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
571         SMB_OFF_T mask = (mask2<<1);
572         SMB_OFF_T max_positive_lock_offset = ~mask;
573
574 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
575
576         /*
577          * In this case either SMB_OFF_T is 32 bits,
578          * or the underlying system cannot handle 64 bit signed locks.
579          * All offsets & counts must be 2^31 or less.
580          */
581
582         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
583
584 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
585
586         /*
587          * POSIX locks of length zero mean lock to end-of-file.
588          * Win32 locks of length zero are point probes. Ignore
589          * any Win32 locks of length zero. JRA.
590          */
591
592         if (count == (SMB_OFF_T)0) {
593                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
594                 return False;
595         }
596
597         /*
598          * If the given offset was > max_positive_lock_offset then we cannot map this at all
599          * ignore this lock.
600          */
601
602         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
603                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
604                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
605                 return False;
606         }
607
608         /*
609          * We must truncate the count to less than max_positive_lock_offset.
610          */
611
612         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
613                 count = max_positive_lock_offset;
614
615         /*
616          * Truncate count to end at max lock offset.
617          */
618
619         if (offset + count < 0 || offset + count > max_positive_lock_offset)
620                 count = max_positive_lock_offset - offset;
621
622         /*
623          * If we ate all the count, ignore this lock.
624          */
625
626         if (count == 0) {
627                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
628                                 (double)u_offset, (double)u_count ));
629                 return False;
630         }
631
632         /*
633          * The mapping was successful.
634          */
635
636         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
637                         (double)offset, (double)count ));
638
639         *offset_out = offset;
640         *count_out = count;
641         
642         return True;
643 }
644
645 /****************************************************************************
646  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
647  broken NFS implementations.
648 ****************************************************************************/
649
650 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
651 {
652         BOOL ret;
653
654         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
655
656         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
657
658         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
659
660                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
661                                         (double)offset,(double)count));
662                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
663                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
664
665                 /*
666                  * If the offset is > 0x7FFFFFFF then this will cause problems on
667                  * 32 bit NFS mounted filesystems. Just ignore it.
668                  */
669
670                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
671                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
672                         return True;
673                 }
674
675                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
676                         /* 32 bit NFS file system, retry with smaller offset */
677                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
678                         errno = 0;
679                         count &= 0x7fffffff;
680                         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
681                 }
682         }
683
684         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
685         return ret;
686 }
687
688 /****************************************************************************
689  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
690  broken NFS implementations.
691 ****************************************************************************/
692
693 static BOOL posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
694 {
695         pid_t pid;
696         BOOL ret;
697
698         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
699                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
700
701         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
702
703         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
704
705                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
706                                         (double)*poffset,(double)*pcount));
707                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
708                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
709
710                 /*
711                  * If the offset is > 0x7FFFFFFF then this will cause problems on
712                  * 32 bit NFS mounted filesystems. Just ignore it.
713                  */
714
715                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
716                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
717                         return True;
718                 }
719
720                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
721                         /* 32 bit NFS file system, retry with smaller offset */
722                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
723                         errno = 0;
724                         *pcount &= 0x7fffffff;
725                         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
726                 }
727         }
728
729         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
730         return ret;
731 }
732
733
734 /****************************************************************************
735  POSIX function to see if a file region is locked. Returns True if the
736  region is locked, False otherwise.
737 ****************************************************************************/
738
739 BOOL is_posix_locked(files_struct *fsp,
740                         SMB_BIG_UINT *pu_offset,
741                         SMB_BIG_UINT *pu_count,
742                         enum brl_type *plock_type,
743                         enum brl_flavour lock_flav)
744 {
745         SMB_OFF_T offset;
746         SMB_OFF_T count;
747         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
748
749         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
750                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
751
752         /*
753          * If the requested lock won't fit in the POSIX range, we will
754          * never set it, so presume it is not locked.
755          */
756
757         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
758                 return False;
759         }
760
761         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
762                 return False;
763         }
764
765         if (posix_lock_type == F_UNLCK) {
766                 return False;
767         }
768
769         if (lock_flav == POSIX_LOCK) {
770                 /* Only POSIX lock queries need to know the details. */
771                 *pu_offset = (SMB_BIG_UINT)offset;
772                 *pu_count = (SMB_BIG_UINT)count;
773                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
774         }
775         return True;
776 }
777
778 /*
779  * Structure used when splitting a lock range
780  * into a POSIX lock range. Doubly linked list.
781  */
782
783 struct lock_list {
784         struct lock_list *next;
785         struct lock_list *prev;
786         SMB_OFF_T start;
787         SMB_OFF_T size;
788 };
789
790 /****************************************************************************
791  Create a list of lock ranges that don't overlap a given range. Used in calculating
792  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
793  understand it :-).
794 ****************************************************************************/
795
796 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
797 {
798         TDB_DATA kbuf = locking_key_fsp(fsp);
799         TDB_DATA dbuf;
800         struct posix_lock *locks;
801         size_t num_locks, i;
802
803         dbuf.dptr = NULL;
804
805         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
806
807         if (!dbuf.dptr)
808                 return lhead;
809         
810         locks = (struct posix_lock *)dbuf.dptr;
811         num_locks = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
812
813         /*
814          * Check the current lock list on this dev/inode pair.
815          * Quit if the list is deleted.
816          */
817
818         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
819                 (double)lhead->start, (double)lhead->size ));
820
821         for (i=0; i<num_locks && lhead; i++) {
822
823                 struct posix_lock *lock = &locks[i];
824                 struct lock_list *l_curr;
825
826                 /*
827                  * Walk the lock list, checking for overlaps. Note that
828                  * the lock list can expand within this loop if the current
829                  * range being examined needs to be split.
830                  */
831
832                 for (l_curr = lhead; l_curr;) {
833
834                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
835                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
836
837                         if ( (l_curr->start >= (lock->start + lock->size)) ||
838                                  (lock->start >= (l_curr->start + l_curr->size))) {
839
840                                 /* No overlap with this lock - leave this range alone. */
841 /*********************************************
842                                              +---------+
843                                              | l_curr  |
844                                              +---------+
845                                 +-------+
846                                 | lock  |
847                                 +-------+
848 OR....
849              +---------+
850              |  l_curr |
851              +---------+
852 **********************************************/
853
854                                 DEBUG(10,("no overlap case.\n" ));
855
856                                 l_curr = l_curr->next;
857
858                         } else if ( (l_curr->start >= lock->start) &&
859                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
860
861                                 /*
862                                  * This unlock is completely overlapped by this existing lock range
863                                  * and thus should have no effect (not be unlocked). Delete it from the list.
864                                  */
865 /*********************************************
866                 +---------+
867                 |  l_curr |
868                 +---------+
869         +---------------------------+
870         |       lock                |
871         +---------------------------+
872 **********************************************/
873                                 /* Save the next pointer */
874                                 struct lock_list *ul_next = l_curr->next;
875
876                                 DEBUG(10,("delete case.\n" ));
877
878                                 DLIST_REMOVE(lhead, l_curr);
879                                 if(lhead == NULL)
880                                         break; /* No more list... */
881
882                                 l_curr = ul_next;
883                                 
884                         } else if ( (l_curr->start >= lock->start) &&
885                                                 (l_curr->start < lock->start + lock->size) &&
886                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
887
888                                 /*
889                                  * This unlock overlaps the existing lock range at the high end.
890                                  * Truncate by moving start to existing range end and reducing size.
891                                  */
892 /*********************************************
893                 +---------------+
894                 |  l_curr       |
895                 +---------------+
896         +---------------+
897         |    lock       |
898         +---------------+
899 BECOMES....
900                         +-------+
901                         | l_curr|
902                         +-------+
903 **********************************************/
904
905                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
906                                 l_curr->start = lock->start + lock->size;
907
908                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
909                                                                 (double)l_curr->start, (double)l_curr->size ));
910
911                                 l_curr = l_curr->next;
912
913                         } else if ( (l_curr->start < lock->start) &&
914                                                 (l_curr->start + l_curr->size > lock->start) &&
915                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
916
917                                 /*
918                                  * This unlock overlaps the existing lock range at the low end.
919                                  * Truncate by reducing size.
920                                  */
921 /*********************************************
922    +---------------+
923    |  l_curr       |
924    +---------------+
925            +---------------+
926            |    lock       |
927            +---------------+
928 BECOMES....
929    +-------+
930    | l_curr|
931    +-------+
932 **********************************************/
933
934                                 l_curr->size = lock->start - l_curr->start;
935
936                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
937                                                                 (double)l_curr->start, (double)l_curr->size ));
938
939                                 l_curr = l_curr->next;
940                 
941                         } else if ( (l_curr->start < lock->start) &&
942                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
943                                 /*
944                                  * Worst case scenario. Unlock request completely overlaps an existing
945                                  * lock range. Split the request into two, push the new (upper) request
946                                  * into the dlink list, and continue with the entry after ul_new (as we
947                                  * know that ul_new will not overlap with this lock).
948                                  */
949 /*********************************************
950         +---------------------------+
951         |        l_curr             |
952         +---------------------------+
953                 +---------+
954                 | lock    |
955                 +---------+
956 BECOMES.....
957         +-------+         +---------+
958         | l_curr|         | l_new   |
959         +-------+         +---------+
960 **********************************************/
961                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
962
963                                 if(l_new == NULL) {
964                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
965                                         return NULL; /* The talloc_destroy takes care of cleanup. */
966                                 }
967
968                                 ZERO_STRUCTP(l_new);
969                                 l_new->start = lock->start + lock->size;
970                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
971
972                                 /* Truncate the l_curr. */
973                                 l_curr->size = lock->start - l_curr->start;
974
975                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
976 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
977                                                                 (double)l_new->start, (double)l_new->size ));
978
979                                 /*
980                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
981                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
982                                  */
983
984                                 l_new->prev = l_curr;
985                                 l_new->next = l_curr->next;
986                                 l_curr->next = l_new;
987
988                                 /* And move after the link we added. */
989                                 l_curr = l_new->next;
990
991                         } else {
992
993                                 /*
994                                  * This logic case should never happen. Ensure this is the
995                                  * case by forcing an abort.... Remove in production.
996                                  */
997                                 pstring msg;
998
999                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
1000 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
1001
1002                                 smb_panic(msg);
1003                         }
1004                 } /* end for ( l_curr = lhead; l_curr;) */
1005         } /* end for (i=0; i<num_locks && ul_head; i++) */
1006
1007         SAFE_FREE(dbuf.dptr);
1008         
1009         return lhead;
1010 }
1011
1012 /****************************************************************************
1013  POSIX function to acquire a lock. Returns True if the
1014  lock could be granted, False if not.
1015  TODO -- Fix POSIX lock flavour semantics.
1016 ****************************************************************************/
1017
1018 BOOL set_posix_lock(files_struct *fsp,
1019                         SMB_BIG_UINT u_offset,
1020                         SMB_BIG_UINT u_count,
1021                         enum brl_type lock_type,
1022                         enum brl_flavour lock_flav)
1023 {
1024         SMB_OFF_T offset;
1025         SMB_OFF_T count;
1026         BOOL ret = True;
1027         size_t entry_num = 0;
1028         size_t lock_count;
1029         TALLOC_CTX *l_ctx = NULL;
1030         struct lock_list *llist = NULL;
1031         struct lock_list *ll = NULL;
1032         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1033
1034         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
1035                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1036
1037         /*
1038          * If the requested lock won't fit in the POSIX range, we will
1039          * pretend it was successful.
1040          */
1041
1042         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1043                 return True;
1044
1045         /*
1046          * Windows is very strange. It allows read locks to be overlayed
1047          * (even over a write lock), but leaves the write lock in force until the first
1048          * unlock. It also reference counts the locks. This means the following sequence :
1049          *
1050          * process1                                      process2
1051          * ------------------------------------------------------------------------
1052          * WRITE LOCK : start = 2, len = 10
1053          *                                            READ LOCK: start =0, len = 10 - FAIL
1054          * READ LOCK : start = 0, len = 14 
1055          *                                            READ LOCK: start =0, len = 10 - FAIL
1056          * UNLOCK : start = 2, len = 10
1057          *                                            READ LOCK: start =0, len = 10 - OK
1058          *
1059          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1060          * would leave a single read lock over the 0-14 region. In order to
1061          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1062          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1063          * semantics that if a write lock is added, then it will be first in the array.
1064          */
1065         
1066         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1067                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1068                 return True; /* Not a fatal error. */
1069         }
1070
1071         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1072                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1073                 talloc_destroy(l_ctx);
1074                 return True; /* Not a fatal error. */
1075         }
1076
1077         /*
1078          * Create the initial list entry containing the
1079          * lock we want to add.
1080          */
1081
1082         ZERO_STRUCTP(ll);
1083         ll->start = offset;
1084         ll->size = count;
1085
1086         DLIST_ADD(llist, ll);
1087
1088         /*
1089          * The following call calculates if there are any
1090          * overlapping locks held by this process on
1091          * fd's open on the same file and splits this list
1092          * into a list of lock ranges that do not overlap with existing
1093          * POSIX locks.
1094          */
1095
1096         llist = posix_lock_list(l_ctx, llist, fsp);
1097
1098         /*
1099          * Now we have the list of ranges to lock it is safe to add the
1100          * entry into the POSIX lock tdb. We take note of the entry we
1101          * added here in case we have to remove it on POSIX lock fail.
1102          */
1103
1104         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1105                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1106                 talloc_destroy(l_ctx);
1107                 return False;
1108         }
1109
1110         /*
1111          * Add the POSIX locks on the list of ranges returned.
1112          * As the lock is supposed to be added atomically, we need to
1113          * back out all the locks if any one of these calls fail.
1114          */
1115
1116         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1117                 offset = ll->start;
1118                 count = ll->size;
1119
1120                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1121                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1122
1123                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1124                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1125                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1126                         ret = False;
1127                         break;
1128                 }
1129         }
1130
1131         if (!ret) {
1132
1133                 /*
1134                  * Back out all the POSIX locks we have on fail.
1135                  */
1136
1137                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1138                         offset = ll->start;
1139                         count = ll->size;
1140
1141                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1142                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1143
1144                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1145                 }
1146
1147                 /*
1148                  * Remove the tdb entry for this lock.
1149                  */
1150
1151                 delete_posix_lock_entry_by_index(fsp,entry_num);
1152         }
1153
1154         talloc_destroy(l_ctx);
1155         return ret;
1156 }
1157
1158 /****************************************************************************
1159  POSIX function to release a lock. Returns True if the
1160  lock could be released, False if not.
1161 ****************************************************************************/
1162
1163 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1164 {
1165         SMB_OFF_T offset;
1166         SMB_OFF_T count;
1167         BOOL ret = True;
1168         TALLOC_CTX *ul_ctx = NULL;
1169         struct lock_list *ulist = NULL;
1170         struct lock_list *ul = NULL;
1171         struct posix_lock deleted_lock;
1172         int num_overlapped_entries;
1173
1174         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1175                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1176
1177         /*
1178          * If the requested lock won't fit in the POSIX range, we will
1179          * pretend it was successful.
1180          */
1181
1182         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1183                 return True;
1184
1185         /*
1186          * We treat this as one unlock request for POSIX accounting purposes even
1187          * if it may later be split into multiple smaller POSIX unlock ranges.
1188          * num_overlapped_entries is the number of existing locks that have any
1189          * overlap with this unlock request.
1190          */ 
1191
1192         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1193
1194         if (num_overlapped_entries == -1) {
1195                 smb_panic("release_posix_lock: unable find entry to delete !\n");
1196         }
1197
1198         /*
1199          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1200          * a POSIX write lock, then before doing the unlock we need to downgrade
1201          * the POSIX lock to a read lock. This allows any overlapping read locks
1202          * to be atomically maintained.
1203          */
1204
1205         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1206                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1207                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1208                         return False;
1209                 }
1210         }
1211
1212         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1213                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1214                 return True; /* Not a fatal error. */
1215         }
1216
1217         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1218                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1219                 talloc_destroy(ul_ctx);
1220                 return True; /* Not a fatal error. */
1221         }
1222
1223         /*
1224          * Create the initial list entry containing the
1225          * lock we want to remove.
1226          */
1227
1228         ZERO_STRUCTP(ul);
1229         ul->start = offset;
1230         ul->size = count;
1231
1232         DLIST_ADD(ulist, ul);
1233
1234         /*
1235          * The following call calculates if there are any
1236          * overlapping locks held by this process on
1237          * fd's open on the same file and creates a
1238          * list of unlock ranges that will allow
1239          * POSIX lock ranges to remain on the file whilst the
1240          * unlocks are performed.
1241          */
1242
1243         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1244
1245         /*
1246          * Release the POSIX locks on the list of ranges returned.
1247          */
1248
1249         for(; ulist; ulist = ulist->next) {
1250                 offset = ulist->start;
1251                 count = ulist->size;
1252
1253                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1254                         (double)offset, (double)count ));
1255
1256                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1257                         ret = False;
1258         }
1259
1260         talloc_destroy(ul_ctx);
1261
1262         return ret;
1263 }
1264
1265 /****************************************************************************
1266  Remove all lock entries for a specific dev/inode pair from the tdb.
1267 ****************************************************************************/
1268
1269 static void delete_posix_lock_entries(files_struct *fsp)
1270 {
1271         TDB_DATA kbuf = locking_key_fsp(fsp);
1272
1273         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1274                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1275 }
1276
1277 /****************************************************************************
1278  Debug function.
1279 ****************************************************************************/
1280
1281 static void dump_entry(struct posix_lock *pl)
1282 {
1283         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1284                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1285 }
1286
1287 /****************************************************************************
1288  Remove any locks on this fd. Called from file_close().
1289 ****************************************************************************/
1290
1291 void posix_locking_close_file(files_struct *fsp)
1292 {
1293         struct posix_lock *entries = NULL;
1294         size_t count, i;
1295
1296         /*
1297          * Optimization for the common case where we are the only
1298          * opener of a file. If all fd entries are our own, we don't
1299          * need to explicitly release all the locks via the POSIX functions,
1300          * we can just remove all the entries in the tdb and allow the
1301          * close to remove the real locks.
1302          */
1303
1304         count = get_posix_lock_entries(fsp, &entries);
1305
1306         if (count == 0) {
1307                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1308                 return;
1309         }
1310
1311         for (i = 0; i < count; i++) {
1312                 if (entries[i].fd != fsp->fh->fd )
1313                         break;
1314
1315                 dump_entry(&entries[i]);
1316         }
1317
1318         if (i == count) {
1319                 /* All locks are ours. */
1320                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1321                         fsp->fsp_name, (unsigned int)count ));
1322                 SAFE_FREE(entries);
1323                 delete_posix_lock_entries(fsp);
1324                 return;
1325         }
1326
1327         /*
1328          * Difficult case. We need to delete all our locks, whilst leaving
1329          * all other POSIX locks in place.
1330          */
1331
1332         for (i = 0; i < count; i++) {
1333                 struct posix_lock *pl = &entries[i];
1334                 if (pl->fd == fsp->fh->fd)
1335                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1336         }
1337         SAFE_FREE(entries);
1338 }
1339
1340 /*******************************************************************
1341  Create the in-memory POSIX lock databases.
1342 ********************************************************************/
1343
1344 BOOL posix_locking_init(int read_only)
1345 {
1346         if (posix_lock_tdb && posix_pending_close_tdb)
1347                 return True;
1348         
1349         if (!posix_lock_tdb)
1350                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1351                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1352         if (!posix_lock_tdb) {
1353                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1354                 return False;
1355         }
1356         if (!posix_pending_close_tdb)
1357                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1358                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1359         if (!posix_pending_close_tdb) {
1360                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1361                 return False;
1362         }
1363
1364         return True;
1365 }
1366
1367 /*******************************************************************
1368  Delete the in-memory POSIX lock databases.
1369 ********************************************************************/
1370
1371 BOOL posix_locking_end(void)
1372 {
1373     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1374                 return False;
1375     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1376                 return False;
1377         return True;
1378 }