r7975: One more tidyup to ensure we're using "struct posix_lock".
[vlendec/samba-autobuild/.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2000
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    Revision History:
21
22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23 */
24
25 #include "includes.h"
26
27 #undef DBGC_CLASS
28 #define DBGC_CLASS DBGC_LOCKING
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76
77         memset(&key, '\0', sizeof(key));
78         key.device = dev;
79         key.inode = inode;
80         kbuf.dptr = (char *)&key;
81         kbuf.dsize = sizeof(key);
82         return kbuf;
83 }
84
85 /*******************************************************************
86  Convenience function to get a key from an fsp.
87 ******************************************************************/
88
89 static TDB_DATA locking_key_fsp(files_struct *fsp)
90 {
91         return locking_key(fsp->dev, fsp->inode);
92 }
93
94 /****************************************************************************
95  Add an fd to the pending close tdb.
96 ****************************************************************************/
97
98 static BOOL add_fd_to_close_entry(files_struct *fsp)
99 {
100         TDB_DATA kbuf = locking_key_fsp(fsp);
101         TDB_DATA dbuf;
102         char *tp;
103
104         dbuf.dptr = NULL;
105         dbuf.dsize = 0;
106
107         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
108
109         tp = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
110         if (!tp) {
111                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
112                 SAFE_FREE(dbuf.dptr);
113                 return False;
114         } else
115                 dbuf.dptr = tp;
116
117         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
118         dbuf.dsize += sizeof(int);
119
120         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
121                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
122         }
123
124         SAFE_FREE(dbuf.dptr);
125         return True;
126 }
127
128 /****************************************************************************
129  Remove all fd entries for a specific dev/inode pair from the tdb.
130 ****************************************************************************/
131
132 static void delete_close_entries(files_struct *fsp)
133 {
134         TDB_DATA kbuf = locking_key_fsp(fsp);
135
136         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
137                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
138 }
139
140 /****************************************************************************
141  Get the array of POSIX pending close records for an open fsp. Caller must
142  free. Returns number of entries.
143 ****************************************************************************/
144
145 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
146 {
147         TDB_DATA kbuf = locking_key_fsp(fsp);
148         TDB_DATA dbuf;
149         size_t count = 0;
150
151         *entries = NULL;
152         dbuf.dptr = NULL;
153
154         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
155
156         if (!dbuf.dptr) {
157                 return 0;
158         }
159
160         *entries = (int *)dbuf.dptr;
161         count = (size_t)(dbuf.dsize / sizeof(int));
162
163         return count;
164 }
165
166 /****************************************************************************
167  Get the array of POSIX locks for an fsp. Caller must free. Returns
168  number of entries.
169 ****************************************************************************/
170
171 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
172 {
173         TDB_DATA kbuf = locking_key_fsp(fsp);
174         TDB_DATA dbuf;
175         size_t count = 0;
176
177         *entries = NULL;
178
179         dbuf.dptr = NULL;
180
181         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
182
183         if (!dbuf.dptr) {
184                 return 0;
185         }
186
187         *entries = (struct posix_lock *)dbuf.dptr;
188         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
189
190         return count;
191 }
192
193 /****************************************************************************
194  Deal with pending closes needed by POSIX locking support.
195  Note that posix_locking_close_file() is expected to have been called
196  to delete all locks on this fsp before this function is called.
197 ****************************************************************************/
198
199 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
200 {
201         int saved_errno = 0;
202         int ret;
203         size_t count, i;
204         struct posix_lock *entries = NULL;
205         int *fd_array = NULL;
206         BOOL locks_on_other_fds = False;
207
208         if (!lp_posix_locking(SNUM(conn))) {
209                 /*
210                  * No POSIX to worry about, just close.
211                  */
212                 ret = SMB_VFS_CLOSE(fsp,fsp->fd);
213                 fsp->fd = -1;
214                 return ret;
215         }
216
217         /*
218          * Get the number of outstanding POSIX locks on this dev/inode pair.
219          */
220
221         count = get_posix_lock_entries(fsp, &entries);
222
223         /*
224          * Check if there are any outstanding locks belonging to
225          * other fd's. This should never be the case if posix_locking_close_file()
226          * has been called first, but it never hurts to be *sure*.
227          */
228
229         for (i = 0; i < count; i++) {
230                 if (entries[i].fd != fsp->fd) {
231                         locks_on_other_fds = True;
232                         break;
233                 }
234         }
235
236         if (locks_on_other_fds) {
237
238                 /*
239                  * There are outstanding locks on this dev/inode pair on other fds.
240                  * Add our fd to the pending close tdb and set fsp->fd to -1.
241                  */
242
243                 if (!add_fd_to_close_entry(fsp)) {
244                         SAFE_FREE(entries);
245                         return -1;
246                 }
247
248                 SAFE_FREE(entries);
249                 fsp->fd = -1;
250                 return 0;
251         }
252
253         SAFE_FREE(entries);
254
255         /*
256          * No outstanding POSIX locks. Get the pending close fd's
257          * from the tdb and close them all.
258          */
259
260         count = get_posix_pending_close_entries(fsp, &fd_array);
261
262         if (count) {
263                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
264
265                 for(i = 0; i < count; i++) {
266                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
267                                 saved_errno = errno;
268                         }
269                 }
270
271                 /*
272                  * Delete all fd's stored in the tdb
273                  * for this dev/inode pair.
274                  */
275
276                 delete_close_entries(fsp);
277         }
278
279         SAFE_FREE(fd_array);
280
281         /*
282          * Finally close the fd associated with this fsp.
283          */
284
285         ret = SMB_VFS_CLOSE(fsp,fsp->fd);
286
287         if (saved_errno != 0) {
288                 errno = saved_errno;
289                 ret = -1;
290         } 
291
292         fsp->fd = -1;
293
294         return ret;
295 }
296
297 /****************************************************************************
298  Debugging aid :-).
299 ****************************************************************************/
300
301 static const char *posix_lock_type_name(int lock_type)
302 {
303         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
304 }
305
306 /****************************************************************************
307  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
308  then the POSIX fcntl lock fails.
309 ****************************************************************************/
310
311 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
312 {
313         TDB_DATA kbuf = locking_key_fsp(fsp);
314         TDB_DATA dbuf;
315         struct posix_lock *locks;
316         size_t count;
317
318         dbuf.dptr = NULL;
319         
320         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
321
322         if (!dbuf.dptr) {
323                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
324                 goto fail;
325         }
326
327         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
328         locks = (struct posix_lock *)dbuf.dptr;
329
330         if (count == 1) {
331                 tdb_delete(posix_lock_tdb, kbuf);
332         } else {
333                 if (entry < count-1) {
334                         memmove(&locks[entry], &locks[entry+1], sizeof(struct posix_lock)*((count-1) - entry));
335                 }
336                 dbuf.dsize -= sizeof(struct posix_lock);
337                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
338         }
339
340         SAFE_FREE(dbuf.dptr);
341
342         return True;
343
344  fail:
345
346         SAFE_FREE(dbuf.dptr);
347         return False;
348 }
349
350 /****************************************************************************
351  Add an entry into the POSIX locking tdb. We return the index number of the
352  added lock (used in case we need to delete *exactly* this entry). Returns
353  False on fail, True on success.
354 ****************************************************************************/
355
356 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
357 {
358         TDB_DATA kbuf = locking_key_fsp(fsp);
359         TDB_DATA dbuf;
360         struct posix_lock pl;
361         char *tp;
362
363         dbuf.dptr = NULL;
364         dbuf.dsize = 0;
365
366         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
367
368         *pentry_num = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
369
370         /*
371          * Add new record.
372          */
373
374         pl.fd = fsp->fd;
375         pl.start = start;
376         pl.size = size;
377         pl.lock_type = lock_type;
378
379         tp = SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(struct posix_lock));
380         if (!tp) {
381                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
382                 goto fail;
383         } else
384                 dbuf.dptr = tp;
385
386         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(struct posix_lock));
387         dbuf.dsize += sizeof(struct posix_lock);
388
389         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
390                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
391                 goto fail;
392         }
393
394         SAFE_FREE(dbuf.dptr);
395
396         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
397                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
398                         (double)fsp->dev, (double)fsp->inode ));
399
400         return True;
401
402  fail:
403
404         SAFE_FREE(dbuf.dptr);
405         return False;
406 }
407
408 /****************************************************************************
409  Calculate if locks have any overlap at all.
410 ****************************************************************************/
411
412 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
413 {
414         if (start1 >= start2 && start1 <= start2 + size2)
415                 return True;
416
417         if (start1 < start2 && start1 + size1 > start2)
418                 return True;
419
420         return False;
421 }
422
423 /****************************************************************************
424  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
425  deleted and the number of records that are overlapped by this one, or -1 on error.
426 ****************************************************************************/
427
428 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
429 {
430         TDB_DATA kbuf = locking_key_fsp(fsp);
431         TDB_DATA dbuf;
432         struct posix_lock *locks;
433         size_t i, count;
434         BOOL found = False;
435         int num_overlapping_records = 0;
436
437         dbuf.dptr = NULL;
438         
439         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
440
441         if (!dbuf.dptr) {
442                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
443                 goto fail;
444         }
445
446         /* There are existing locks - find a match. */
447         locks = (struct posix_lock *)dbuf.dptr;
448         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
449
450         /*
451          * Search for and delete the first record that matches the
452          * unlock criteria.
453          */
454
455         for (i=0; i<count; i++) { 
456                 struct posix_lock *entry = &locks[i];
457
458                 if (entry->fd == fsp->fd &&
459                         entry->start == start &&
460                         entry->size == size) {
461
462                         /* Make a copy if requested. */
463                         if (pl)
464                                 *pl = *entry;
465
466                         /* Found it - delete it. */
467                         if (count == 1) {
468                                 tdb_delete(posix_lock_tdb, kbuf);
469                         } else {
470                                 if (i < count-1) {
471                                         memmove(&locks[i], &locks[i+1], sizeof(struct posix_lock)*((count-1) - i));
472                                 }
473                                 dbuf.dsize -= sizeof(struct posix_lock);
474                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
475                         }
476                         count--;
477                         found = True;
478                         break;
479                 }
480         }
481
482         if (!found)
483                 goto fail;
484
485         /*
486          * Count the number of entries that are
487          * overlapped by this unlock request.
488          */
489
490         for (i = 0; i < count; i++) {
491                 struct posix_lock *entry = &locks[i];
492
493                 if (fsp->fd == entry->fd &&
494                         does_lock_overlap( start, size, entry->start, entry->size))
495                                 num_overlapping_records++;
496         }
497
498         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
499                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
500                                 (unsigned int)num_overlapping_records ));
501
502         SAFE_FREE(dbuf.dptr);
503
504         return num_overlapping_records;
505
506  fail:
507
508         SAFE_FREE(dbuf.dptr);
509         return -1;
510 }
511
512 /****************************************************************************
513  Utility function to map a lock type correctly depending on the open
514  mode of a file.
515 ****************************************************************************/
516
517 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
518 {
519         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
520                 /*
521                  * Many UNIX's cannot get a write lock on a file opened read-only.
522                  * Win32 locking semantics allow this.
523                  * Do the best we can and attempt a read-only lock.
524                  */
525                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
526                 return F_RDLCK;
527         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
528                 /*
529                  * Ditto for read locks on write only files.
530                  */
531                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
532                 return F_WRLCK;
533         }
534
535         /*
536          * This return should be the most normal, as we attempt
537          * to always open files read/write.
538          */
539
540         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
541 }
542
543 /****************************************************************************
544  Check to see if the given unsigned lock range is within the possible POSIX
545  range. Modifies the given args to be in range if possible, just returns
546  False if not.
547 ****************************************************************************/
548
549 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
550                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
551 {
552         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
553         SMB_OFF_T count = (SMB_OFF_T)u_count;
554
555         /*
556          * For the type of system we are, attempt to
557          * find the maximum positive lock offset as an SMB_OFF_T.
558          */
559
560 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
561
562         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
563
564 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
565
566         /*
567          * In this case SMB_OFF_T is 64 bits,
568          * and the underlying system can handle 64 bit signed locks.
569          */
570
571         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
572         SMB_OFF_T mask = (mask2<<1);
573         SMB_OFF_T max_positive_lock_offset = ~mask;
574
575 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
576
577         /*
578          * In this case either SMB_OFF_T is 32 bits,
579          * or the underlying system cannot handle 64 bit signed locks.
580          * All offsets & counts must be 2^31 or less.
581          */
582
583         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
584
585 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
586
587         /*
588          * POSIX locks of length zero mean lock to end-of-file.
589          * Win32 locks of length zero are point probes. Ignore
590          * any Win32 locks of length zero. JRA.
591          */
592
593         if (count == (SMB_OFF_T)0) {
594                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
595                 return False;
596         }
597
598         /*
599          * If the given offset was > max_positive_lock_offset then we cannot map this at all
600          * ignore this lock.
601          */
602
603         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
604                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
605                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
606                 return False;
607         }
608
609         /*
610          * We must truncate the count to less than max_positive_lock_offset.
611          */
612
613         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset))
614                 count = max_positive_lock_offset;
615
616         /*
617          * Truncate count to end at max lock offset.
618          */
619
620         if (offset + count < 0 || offset + count > max_positive_lock_offset)
621                 count = max_positive_lock_offset - offset;
622
623         /*
624          * If we ate all the count, ignore this lock.
625          */
626
627         if (count == 0) {
628                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
629                                 (double)u_offset, (double)u_count ));
630                 return False;
631         }
632
633         /*
634          * The mapping was successful.
635          */
636
637         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
638                         (double)offset, (double)count ));
639
640         *offset_out = offset;
641         *count_out = count;
642         
643         return True;
644 }
645
646 /****************************************************************************
647  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
648  broken NFS implementations.
649 ****************************************************************************/
650
651 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
652 {
653         int ret;
654
655         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
656
657         ret = SMB_VFS_LOCK(fsp,fsp->fd,op,offset,count,type);
658
659         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
660
661                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
662                                         (double)offset,(double)count));
663                 DEBUG(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
664                 DEBUG(0,("on 32 bit NFS mounted file systems.\n"));
665
666                 /*
667                  * If the offset is > 0x7FFFFFFF then this will cause problems on
668                  * 32 bit NFS mounted filesystems. Just ignore it.
669                  */
670
671                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
672                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
673                         return True;
674                 }
675
676                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
677                         /* 32 bit NFS file system, retry with smaller offset */
678                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
679                         errno = 0;
680                         count &= 0x7fffffff;
681                         ret = SMB_VFS_LOCK(fsp,fsp->fd,op,offset,count,type);
682                 }
683         }
684
685         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
686
687         return ret;
688 }
689
690 /****************************************************************************
691  POSIX function to see if a file region is locked. Returns True if the
692  region is locked, False otherwise.
693 ****************************************************************************/
694
695 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
696 {
697         SMB_OFF_T offset;
698         SMB_OFF_T count;
699         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
700
701         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
702                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
703
704         /*
705          * If the requested lock won't fit in the POSIX range, we will
706          * never set it, so presume it is not locked.
707          */
708
709         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
710                 return False;
711
712         /*
713          * Note that most UNIX's can *test* for a write lock on
714          * a read-only fd, just not *set* a write lock on a read-only
715          * fd. So we don't need to use map_lock_type here.
716          */ 
717
718         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
719 }
720
721 /*
722  * Structure used when splitting a lock range
723  * into a POSIX lock range. Doubly linked list.
724  */
725
726 struct lock_list {
727         struct lock_list *next;
728         struct lock_list *prev;
729         SMB_OFF_T start;
730         SMB_OFF_T size;
731 };
732
733 /****************************************************************************
734  Create a list of lock ranges that don't overlap a given range. Used in calculating
735  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
736  understand it :-).
737 ****************************************************************************/
738
739 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
740 {
741         TDB_DATA kbuf = locking_key_fsp(fsp);
742         TDB_DATA dbuf;
743         struct posix_lock *locks;
744         size_t num_locks, i;
745
746         dbuf.dptr = NULL;
747
748         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
749
750         if (!dbuf.dptr)
751                 return lhead;
752         
753         locks = (struct posix_lock *)dbuf.dptr;
754         num_locks = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
755
756         /*
757          * Check the current lock list on this dev/inode pair.
758          * Quit if the list is deleted.
759          */
760
761         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
762                 (double)lhead->start, (double)lhead->size ));
763
764         for (i=0; i<num_locks && lhead; i++) {
765
766                 struct posix_lock *lock = &locks[i];
767                 struct lock_list *l_curr;
768
769                 /*
770                  * Walk the lock list, checking for overlaps. Note that
771                  * the lock list can expand within this loop if the current
772                  * range being examined needs to be split.
773                  */
774
775                 for (l_curr = lhead; l_curr;) {
776
777                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
778                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
779
780                         if ( (l_curr->start >= (lock->start + lock->size)) ||
781                                  (lock->start >= (l_curr->start + l_curr->size))) {
782
783                                 /* No overlap with this lock - leave this range alone. */
784 /*********************************************
785                                              +---------+
786                                              | l_curr  |
787                                              +---------+
788                                 +-------+
789                                 | lock  |
790                                 +-------+
791 OR....
792              +---------+
793              |  l_curr |
794              +---------+
795 **********************************************/
796
797                                 DEBUG(10,("no overlap case.\n" ));
798
799                                 l_curr = l_curr->next;
800
801                         } else if ( (l_curr->start >= lock->start) &&
802                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
803
804                                 /*
805                                  * This unlock is completely overlapped by this existing lock range
806                                  * and thus should have no effect (not be unlocked). Delete it from the list.
807                                  */
808 /*********************************************
809                 +---------+
810                 |  l_curr |
811                 +---------+
812         +---------------------------+
813         |       lock                |
814         +---------------------------+
815 **********************************************/
816                                 /* Save the next pointer */
817                                 struct lock_list *ul_next = l_curr->next;
818
819                                 DEBUG(10,("delete case.\n" ));
820
821                                 DLIST_REMOVE(lhead, l_curr);
822                                 if(lhead == NULL)
823                                         break; /* No more list... */
824
825                                 l_curr = ul_next;
826                                 
827                         } else if ( (l_curr->start >= lock->start) &&
828                                                 (l_curr->start < lock->start + lock->size) &&
829                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
830
831                                 /*
832                                  * This unlock overlaps the existing lock range at the high end.
833                                  * Truncate by moving start to existing range end and reducing size.
834                                  */
835 /*********************************************
836                 +---------------+
837                 |  l_curr       |
838                 +---------------+
839         +---------------+
840         |    lock       |
841         +---------------+
842 BECOMES....
843                         +-------+
844                         | l_curr|
845                         +-------+
846 **********************************************/
847
848                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
849                                 l_curr->start = lock->start + lock->size;
850
851                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
852                                                                 (double)l_curr->start, (double)l_curr->size ));
853
854                                 l_curr = l_curr->next;
855
856                         } else if ( (l_curr->start < lock->start) &&
857                                                 (l_curr->start + l_curr->size > lock->start) &&
858                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
859
860                                 /*
861                                  * This unlock overlaps the existing lock range at the low end.
862                                  * Truncate by reducing size.
863                                  */
864 /*********************************************
865    +---------------+
866    |  l_curr       |
867    +---------------+
868            +---------------+
869            |    lock       |
870            +---------------+
871 BECOMES....
872    +-------+
873    | l_curr|
874    +-------+
875 **********************************************/
876
877                                 l_curr->size = lock->start - l_curr->start;
878
879                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
880                                                                 (double)l_curr->start, (double)l_curr->size ));
881
882                                 l_curr = l_curr->next;
883                 
884                         } else if ( (l_curr->start < lock->start) &&
885                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
886                                 /*
887                                  * Worst case scenario. Unlock request completely overlaps an existing
888                                  * lock range. Split the request into two, push the new (upper) request
889                                  * into the dlink list, and continue with the entry after ul_new (as we
890                                  * know that ul_new will not overlap with this lock).
891                                  */
892 /*********************************************
893         +---------------------------+
894         |        l_curr             |
895         +---------------------------+
896                 +---------+
897                 | lock    |
898                 +---------+
899 BECOMES.....
900         +-------+         +---------+
901         | l_curr|         | l_new   |
902         +-------+         +---------+
903 **********************************************/
904                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
905
906                                 if(l_new == NULL) {
907                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
908                                         return NULL; /* The talloc_destroy takes care of cleanup. */
909                                 }
910
911                                 ZERO_STRUCTP(l_new);
912                                 l_new->start = lock->start + lock->size;
913                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
914
915                                 /* Truncate the l_curr. */
916                                 l_curr->size = lock->start - l_curr->start;
917
918                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
919 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
920                                                                 (double)l_new->start, (double)l_new->size ));
921
922                                 /*
923                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
924                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
925                                  */
926
927                                 l_new->prev = l_curr;
928                                 l_new->next = l_curr->next;
929                                 l_curr->next = l_new;
930
931                                 /* And move after the link we added. */
932                                 l_curr = l_new->next;
933
934                         } else {
935
936                                 /*
937                                  * This logic case should never happen. Ensure this is the
938                                  * case by forcing an abort.... Remove in production.
939                                  */
940                                 pstring msg;
941
942                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
943 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
944
945                                 smb_panic(msg);
946                         }
947                 } /* end for ( l_curr = lhead; l_curr;) */
948         } /* end for (i=0; i<num_locks && ul_head; i++) */
949
950         SAFE_FREE(dbuf.dptr);
951         
952         return lhead;
953 }
954
955 /****************************************************************************
956  POSIX function to acquire a lock. Returns True if the
957  lock could be granted, False if not.
958 ****************************************************************************/
959
960 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
961 {
962         SMB_OFF_T offset;
963         SMB_OFF_T count;
964         BOOL ret = True;
965         size_t entry_num = 0;
966         size_t lock_count;
967         TALLOC_CTX *l_ctx = NULL;
968         struct lock_list *llist = NULL;
969         struct lock_list *ll = NULL;
970         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
971
972         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
973                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
974
975         /*
976          * If the requested lock won't fit in the POSIX range, we will
977          * pretend it was successful.
978          */
979
980         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
981                 return True;
982
983         /*
984          * Windows is very strange. It allows read locks to be overlayed
985          * (even over a write lock), but leaves the write lock in force until the first
986          * unlock. It also reference counts the locks. This means the following sequence :
987          *
988          * process1                                      process2
989          * ------------------------------------------------------------------------
990          * WRITE LOCK : start = 2, len = 10
991          *                                            READ LOCK: start =0, len = 10 - FAIL
992          * READ LOCK : start = 0, len = 14 
993          *                                            READ LOCK: start =0, len = 10 - FAIL
994          * UNLOCK : start = 2, len = 10
995          *                                            READ LOCK: start =0, len = 10 - OK
996          *
997          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
998          * would leave a single read lock over the 0-14 region. In order to
999          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1000          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1001          * semantics that if a write lock is added, then it will be first in the array.
1002          */
1003         
1004         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1005                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1006                 return True; /* Not a fatal error. */
1007         }
1008
1009         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1010                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1011                 talloc_destroy(l_ctx);
1012                 return True; /* Not a fatal error. */
1013         }
1014
1015         /*
1016          * Create the initial list entry containing the
1017          * lock we want to add.
1018          */
1019
1020         ZERO_STRUCTP(ll);
1021         ll->start = offset;
1022         ll->size = count;
1023
1024         DLIST_ADD(llist, ll);
1025
1026         /*
1027          * The following call calculates if there are any
1028          * overlapping locks held by this process on
1029          * fd's open on the same file and splits this list
1030          * into a list of lock ranges that do not overlap with existing
1031          * POSIX locks.
1032          */
1033
1034         llist = posix_lock_list(l_ctx, llist, fsp);
1035
1036         /*
1037          * Now we have the list of ranges to lock it is safe to add the
1038          * entry into the POSIX lock tdb. We take note of the entry we
1039          * added here in case we have to remove it on POSIX lock fail.
1040          */
1041
1042         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1043                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1044                 talloc_destroy(l_ctx);
1045                 return False;
1046         }
1047
1048         /*
1049          * Add the POSIX locks on the list of ranges returned.
1050          * As the lock is supposed to be added atomically, we need to
1051          * back out all the locks if any one of these calls fail.
1052          */
1053
1054         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1055                 offset = ll->start;
1056                 count = ll->size;
1057
1058                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1059                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1060
1061                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1062                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1063                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1064                         ret = False;
1065                         break;
1066                 }
1067         }
1068
1069         if (!ret) {
1070
1071                 /*
1072                  * Back out all the POSIX locks we have on fail.
1073                  */
1074
1075                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1076                         offset = ll->start;
1077                         count = ll->size;
1078
1079                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1080                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1081
1082                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1083                 }
1084
1085                 /*
1086                  * Remove the tdb entry for this lock.
1087                  */
1088
1089                 delete_posix_lock_entry_by_index(fsp,entry_num);
1090         }
1091
1092         talloc_destroy(l_ctx);
1093         return ret;
1094 }
1095
1096 /****************************************************************************
1097  POSIX function to release a lock. Returns True if the
1098  lock could be released, False if not.
1099 ****************************************************************************/
1100
1101 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1102 {
1103         SMB_OFF_T offset;
1104         SMB_OFF_T count;
1105         BOOL ret = True;
1106         TALLOC_CTX *ul_ctx = NULL;
1107         struct lock_list *ulist = NULL;
1108         struct lock_list *ul = NULL;
1109         struct posix_lock deleted_lock;
1110         int num_overlapped_entries;
1111
1112         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1113                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1114
1115         /*
1116          * If the requested lock won't fit in the POSIX range, we will
1117          * pretend it was successful.
1118          */
1119
1120         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1121                 return True;
1122
1123         /*
1124          * We treat this as one unlock request for POSIX accounting purposes even
1125          * if it may later be split into multiple smaller POSIX unlock ranges.
1126          * num_overlapped_entries is the number of existing locks that have any
1127          * overlap with this unlock request.
1128          */ 
1129
1130         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1131
1132         if (num_overlapped_entries == -1) {
1133                 smb_panic("release_posix_lock: unable find entry to delete !\n");
1134         }
1135
1136         /*
1137          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1138          * a POSIX write lock, then before doing the unlock we need to downgrade
1139          * the POSIX lock to a read lock. This allows any overlapping read locks
1140          * to be atomically maintained.
1141          */
1142
1143         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1144                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1145                         DEBUG(0,("release_posix_lock: downgrade of lock failed with error %s !\n", strerror(errno) ));
1146                         return False;
1147                 }
1148         }
1149
1150         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1151                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1152                 return True; /* Not a fatal error. */
1153         }
1154
1155         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1156                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1157                 talloc_destroy(ul_ctx);
1158                 return True; /* Not a fatal error. */
1159         }
1160
1161         /*
1162          * Create the initial list entry containing the
1163          * lock we want to remove.
1164          */
1165
1166         ZERO_STRUCTP(ul);
1167         ul->start = offset;
1168         ul->size = count;
1169
1170         DLIST_ADD(ulist, ul);
1171
1172         /*
1173          * The following call calculates if there are any
1174          * overlapping locks held by this process on
1175          * fd's open on the same file and creates a
1176          * list of unlock ranges that will allow
1177          * POSIX lock ranges to remain on the file whilst the
1178          * unlocks are performed.
1179          */
1180
1181         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1182
1183         /*
1184          * Release the POSIX locks on the list of ranges returned.
1185          */
1186
1187         for(; ulist; ulist = ulist->next) {
1188                 offset = ulist->start;
1189                 count = ulist->size;
1190
1191                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1192                         (double)offset, (double)count ));
1193
1194                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1195                         ret = False;
1196         }
1197
1198         talloc_destroy(ul_ctx);
1199
1200         return ret;
1201 }
1202
1203 /****************************************************************************
1204  Remove all lock entries for a specific dev/inode pair from the tdb.
1205 ****************************************************************************/
1206
1207 static void delete_posix_lock_entries(files_struct *fsp)
1208 {
1209         TDB_DATA kbuf = locking_key_fsp(fsp);
1210
1211         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1212                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1213 }
1214
1215 /****************************************************************************
1216  Debug function.
1217 ****************************************************************************/
1218
1219 static void dump_entry(struct posix_lock *pl)
1220 {
1221         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1222                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1223 }
1224
1225 /****************************************************************************
1226  Remove any locks on this fd. Called from file_close().
1227 ****************************************************************************/
1228
1229 void posix_locking_close_file(files_struct *fsp)
1230 {
1231         struct posix_lock *entries = NULL;
1232         size_t count, i;
1233
1234         /*
1235          * Optimization for the common case where we are the only
1236          * opener of a file. If all fd entries are our own, we don't
1237          * need to explicitly release all the locks via the POSIX functions,
1238          * we can just remove all the entries in the tdb and allow the
1239          * close to remove the real locks.
1240          */
1241
1242         count = get_posix_lock_entries(fsp, &entries);
1243
1244         if (count == 0) {
1245                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1246                 return;
1247         }
1248
1249         for (i = 0; i < count; i++) {
1250                 if (entries[i].fd != fsp->fd )
1251                         break;
1252
1253                 dump_entry(&entries[i]);
1254         }
1255
1256         if (i == count) {
1257                 /* All locks are ours. */
1258                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1259                         fsp->fsp_name, (unsigned int)count ));
1260                 SAFE_FREE(entries);
1261                 delete_posix_lock_entries(fsp);
1262                 return;
1263         }
1264
1265         /*
1266          * Difficult case. We need to delete all our locks, whilst leaving
1267          * all other POSIX locks in place.
1268          */
1269
1270         for (i = 0; i < count; i++) {
1271                 struct posix_lock *pl = &entries[i];
1272                 if (pl->fd == fsp->fd)
1273                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1274         }
1275         SAFE_FREE(entries);
1276 }
1277
1278 /*******************************************************************
1279  Create the in-memory POSIX lock databases.
1280 ********************************************************************/
1281
1282 BOOL posix_locking_init(int read_only)
1283 {
1284         if (posix_lock_tdb && posix_pending_close_tdb)
1285                 return True;
1286         
1287         if (!posix_lock_tdb)
1288                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1289                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1290         if (!posix_lock_tdb) {
1291                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1292                 return False;
1293         }
1294         if (!posix_pending_close_tdb)
1295                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1296                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1297         if (!posix_pending_close_tdb) {
1298                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1299                 return False;
1300         }
1301
1302         return True;
1303 }
1304
1305 /*******************************************************************
1306  Delete the in-memory POSIX lock databases.
1307 ********************************************************************/
1308
1309 BOOL posix_locking_end(void)
1310 {
1311     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1312                 return False;
1313     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1314                 return False;
1315         return True;
1316 }