7bac1ffe37983439bf32b558bc82068237effd11
[samba.git] / source / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28 extern int global_smbpid;
29
30 /*
31  * The POSIX locking database handle.
32  */
33
34 static TDB_CONTEXT *posix_lock_tdb;
35
36 /*
37  * The pending close database handle.
38  */
39
40 static TDB_CONTEXT *posix_pending_close_tdb;
41
42 /*
43  * The data in POSIX lock records is an unsorted linear array of these
44  * records.  It is unnecessary to store the count as tdb provides the
45  * size of the record.
46  */
47
48 struct posix_lock {
49         int fd;
50         SMB_OFF_T start;
51         SMB_OFF_T size;
52         int lock_type;
53 };
54
55 /*
56  * The data in POSIX pending close records is an unsorted linear array of int
57  * records.  It is unnecessary to store the count as tdb provides the
58  * size of the record.
59  */
60
61 /* The key used in both the POSIX databases. */
62
63 struct posix_lock_key {
64         SMB_DEV_T device;
65         SMB_INO_T inode;
66 }; 
67
68 /*******************************************************************
69  Form a static locking key for a dev/inode pair.
70 ******************************************************************/
71
72 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
73 {
74         static struct posix_lock_key key;
75         TDB_DATA kbuf;
76
77         memset(&key, '\0', sizeof(key));
78         key.device = dev;
79         key.inode = inode;
80         kbuf.dptr = (char *)&key;
81         kbuf.dsize = sizeof(key);
82         return kbuf;
83 }
84
85 /*******************************************************************
86  Convenience function to get a key from an fsp.
87 ******************************************************************/
88
89 static TDB_DATA locking_key_fsp(files_struct *fsp)
90 {
91         return locking_key(fsp->dev, fsp->inode);
92 }
93
94 /****************************************************************************
95  Add an fd to the pending close tdb.
96 ****************************************************************************/
97
98 static BOOL add_fd_to_close_entry(files_struct *fsp)
99 {
100         TDB_DATA kbuf = locking_key_fsp(fsp);
101         TDB_DATA dbuf;
102
103         dbuf.dptr = NULL;
104
105         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
106
107         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
108         if (!dbuf.dptr) {
109                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
110                 return False;
111         }
112         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
113         dbuf.dsize += sizeof(int);
114
115         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
116                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
117         }
118
119         free(dbuf.dptr);
120         return True;
121 }
122
123 /****************************************************************************
124  Remove all fd entries for a specific dev/inode pair from the tdb.
125 ****************************************************************************/
126
127 static void delete_close_entries(files_struct *fsp)
128 {
129         TDB_DATA kbuf = locking_key_fsp(fsp);
130
131         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
132                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
133 }
134
135 /****************************************************************************
136  Get the array of POSIX pending close records for an open fsp. Caller must
137  free. Returns number of entries.
138 ****************************************************************************/
139
140 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
141 {
142         TDB_DATA kbuf = locking_key_fsp(fsp);
143         TDB_DATA dbuf;
144         size_t count = 0;
145
146         *entries = NULL;
147         dbuf.dptr = NULL;
148
149         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
150
151     if (!dbuf.dptr) {
152                 return 0;
153         }
154
155         *entries = (int *)dbuf.dptr;
156         count = (size_t)(dbuf.dsize / sizeof(int));
157
158         return count;
159 }
160
161 /****************************************************************************
162  Get the array of POSIX locks for an fsp. Caller must free. Returns
163  number of entries.
164 ****************************************************************************/
165
166 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
167 {
168         TDB_DATA kbuf = locking_key_fsp(fsp);
169         TDB_DATA dbuf;
170         size_t count = 0;
171
172         *entries = NULL;
173
174         dbuf.dptr = NULL;
175
176         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
177
178     if (!dbuf.dptr) {
179                 return 0;
180         }
181
182         *entries = (struct posix_lock *)dbuf.dptr;
183         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
184
185         return count;
186 }
187
188 /****************************************************************************
189  Deal with pending closes needed by POSIX locking support.
190  Note that posix_locking_close_file() is expected to have been called
191  to delete all locks on this fsp before this function is called.
192 ****************************************************************************/
193
194 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
195 {
196         int saved_errno = 0;
197         int ret;
198         size_t count, i;
199         struct posix_lock *entries = NULL;
200         int *fd_array = NULL;
201         BOOL locks_on_other_fds = False;
202
203         if (!lp_posix_locking(SNUM(conn))) {
204                 /*
205                  * No POSIX to worry about, just close.
206                  */
207                 ret = conn->vfs_ops.close(fsp,fsp->fd);
208                 fsp->fd = -1;
209                 return ret;
210         }
211
212         /*
213          * Get the number of outstanding POSIX locks on this dev/inode pair.
214          */
215
216         count = get_posix_lock_entries(fsp, &entries);
217
218         /*
219          * Check if there are any outstanding locks belonging to
220          * other fd's. This should never be the case if posix_locking_close_file()
221          * has been called first, but it never hurts to be *sure*.
222          */
223
224         for (i = 0; i < count; i++) {
225                 if (entries[i].fd != fsp->fd) {
226                         locks_on_other_fds = True;
227                         break;
228                 }
229         }
230
231         if (locks_on_other_fds) {
232
233                 /*
234                  * There are outstanding locks on this dev/inode pair on other fds.
235                  * Add our fd to the pending close tdb and set fsp->fd to -1.
236                  */
237
238                 if (!add_fd_to_close_entry(fsp)) {
239                         free((char *)entries);
240                         return False;
241                 }
242
243                 free((char *)entries);
244                 fsp->fd = -1;
245                 return 0;
246         }
247
248         if(entries)
249                 free((char *)entries);
250
251         /*
252          * No outstanding POSIX locks. Get the pending close fd's
253          * from the tdb and close them all.
254          */
255
256         count = get_posix_pending_close_entries(fsp, &fd_array);
257
258         if (count) {
259                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
260
261                 for(i = 0; i < count; i++) {
262                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
263                                 saved_errno = errno;
264                         }
265                 }
266
267                 /*
268                  * Delete all fd's stored in the tdb
269                  * for this dev/inode pair.
270                  */
271
272                 delete_close_entries(fsp);
273         }
274
275         if (fd_array)
276                 free((char *)fd_array);
277
278         /*
279          * Finally close the fd associated with this fsp.
280          */
281
282         ret = conn->vfs_ops.close(fsp,fsp->fd);
283
284         if (saved_errno != 0) {
285         errno = saved_errno;
286                 ret = -1;
287     } 
288
289         fsp->fd = -1;
290
291         return ret;
292 }
293
294 /****************************************************************************
295  Debugging aid :-).
296 ****************************************************************************/
297
298 static const char *posix_lock_type_name(int lock_type)
299 {
300         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
301 }
302
303 /****************************************************************************
304  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
305  then the POSIX fcntl lock fails.
306 ****************************************************************************/
307
308 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
309 {
310         TDB_DATA kbuf = locking_key_fsp(fsp);
311         TDB_DATA dbuf;
312         struct posix_lock *locks;
313         size_t count;
314
315         dbuf.dptr = NULL;
316         
317         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
318
319         if (!dbuf.dptr) {
320                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
321                 goto fail;
322         }
323
324         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
325         locks = (struct posix_lock *)dbuf.dptr;
326
327         if (count == 1) {
328                 tdb_delete(posix_lock_tdb, kbuf);
329         } else {
330                 if (entry < count-1) {
331                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
332                 }
333                 dbuf.dsize -= sizeof(*locks);
334                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
335         }
336
337         free(dbuf.dptr);
338
339         return True;
340
341  fail:
342     if (dbuf.dptr)
343                 free(dbuf.dptr);
344     return False;
345 }
346
347 /****************************************************************************
348  Add an entry into the POSIX locking tdb. We return the index number of the
349  added lock (used in case we need to delete *exactly* this entry). Returns
350  False on fail, True on success.
351 ****************************************************************************/
352
353 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
354 {
355         TDB_DATA kbuf = locking_key_fsp(fsp);
356         TDB_DATA dbuf;
357         struct posix_lock pl;
358
359         dbuf.dptr = NULL;
360
361         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
362
363         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
364
365         /*
366          * Add new record.
367          */
368
369         pl.fd = fsp->fd;
370         pl.start = start;
371         pl.size = size;
372         pl.lock_type = lock_type;
373
374         dbuf.dptr = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
375         if (!dbuf.dptr) {
376                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
377                 goto fail;
378         }
379
380         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
381         dbuf.dsize += sizeof(pl);
382
383         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
384                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
385                 goto fail;
386         }
387
388     free(dbuf.dptr);
389
390         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
391                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
392                         (double)fsp->dev, (double)fsp->inode ));
393
394     return True;
395
396  fail:
397     if (dbuf.dptr)
398                 free(dbuf.dptr);
399     return False;
400 }
401
402 /****************************************************************************
403  Calculate if locks have any overlap at all.
404 ****************************************************************************/
405
406 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
407 {
408         if (start1 >= start2 && start1 <= start2 + size2)
409                 return True;
410
411         if (start1 < start2 && start1 + size1 > start2);
412                 return True;
413
414         return False;
415 }
416
417 /****************************************************************************
418  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
419  deleted and the number of records that are overlapped by this one, or -1 on error.
420 ****************************************************************************/
421
422 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
423 {
424         TDB_DATA kbuf = locking_key_fsp(fsp);
425         TDB_DATA dbuf;
426         struct posix_lock *locks;
427         size_t i, count;
428         BOOL found = False;
429         int num_overlapping_records = 0;
430
431         dbuf.dptr = NULL;
432         
433         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
434
435         if (!dbuf.dptr) {
436                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
437                 goto fail;
438         }
439
440         /* There are existing locks - find a match. */
441         locks = (struct posix_lock *)dbuf.dptr;
442         count = (size_t)(dbuf.dsize / sizeof(*locks));
443
444         /*
445          * Search for and delete the first record that matches the
446          * unlock criteria.
447          */
448
449         for (i=0; i<count; i++) { 
450                 struct posix_lock *entry = &locks[i];
451
452                 if (entry->fd == fsp->fd &&
453                         entry->start == start &&
454                         entry->size == size) {
455
456                         /* Make a copy if requested. */
457                         if (pl)
458                                 *pl = *entry;
459
460                         /* Found it - delete it. */
461                         if (count == 1) {
462                                 tdb_delete(posix_lock_tdb, kbuf);
463                         } else {
464                                 if (i < count-1) {
465                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
466                                 }
467                                 dbuf.dsize -= sizeof(*locks);
468                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
469                         }
470                         count--;
471                         found = True;
472                         break;
473                 }
474         }
475
476         if (!found)
477                 goto fail;
478
479         /*
480          * Count the number of entries that are
481          * overlapped by this unlock request.
482          */
483
484         for (i = 0; i < count; i++) {
485                 struct posix_lock *entry = &locks[i];
486
487                 if (fsp->fd == entry->fd &&
488                         does_lock_overlap( start, size, entry->start, entry->size))
489                                 num_overlapping_records++;
490         }
491
492         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
493                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
494                                 (unsigned int)num_overlapping_records ));
495
496     if (dbuf.dptr)
497                 free(dbuf.dptr);
498
499         return num_overlapping_records;
500
501  fail:
502     if (dbuf.dptr)
503                 free(dbuf.dptr);
504     return -1;
505 }
506
507 /****************************************************************************
508  Utility function to map a lock type correctly depending on the open
509  mode of a file.
510 ****************************************************************************/
511
512 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
513 {
514         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
515                 /*
516                  * Many UNIX's cannot get a write lock on a file opened read-only.
517                  * Win32 locking semantics allow this.
518                  * Do the best we can and attempt a read-only lock.
519                  */
520                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
521                 return F_RDLCK;
522         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
523                 /*
524                  * Ditto for read locks on write only files.
525                  */
526                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
527                 return F_WRLCK;
528         }
529
530   /*
531    * This return should be the most normal, as we attempt
532    * to always open files read/write.
533    */
534
535   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
536 }
537
538 /****************************************************************************
539  Check to see if the given unsigned lock range is within the possible POSIX
540  range. Modifies the given args to be in range if possible, just returns
541  False if not.
542 ****************************************************************************/
543
544 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
545                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
546 {
547         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
548         SMB_OFF_T count = (SMB_OFF_T)u_count;
549
550         /*
551          * For the type of system we are, attempt to
552          * find the maximum positive lock offset as an SMB_OFF_T.
553          */
554
555 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
556
557         /*
558          * In this case SMB_OFF_T is 64 bits,
559          * and the underlying system can handle 64 bit signed locks.
560          */
561
562     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
563     SMB_OFF_T mask = (mask2<<1);
564     SMB_OFF_T max_positive_lock_offset = ~mask;
565
566 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
567
568         /*
569          * In this case either SMB_OFF_T is 32 bits,
570          * or the underlying system cannot handle 64 bit signed locks.
571          * All offsets & counts must be 2^31 or less.
572          */
573
574     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
575
576 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
577
578         /*
579          * If the given offset was > max_positive_lock_offset then we cannot map this at all
580          * ignore this lock.
581          */
582
583         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
584                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
585                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
586                 return False;
587         }
588
589         /*
590          * We must truncate the offset and count to less than max_positive_lock_offset.
591          */
592
593         offset &= max_positive_lock_offset;
594         count &= max_positive_lock_offset;
595
596
597         /*
598          * Deal with a very common case of count of all ones.
599          * (lock entire file).
600          */
601
602         if(count == (SMB_OFF_T)-1)
603                 count = max_positive_lock_offset;
604
605         /*
606          * Truncate count to end at max lock offset.
607          */
608
609         if (offset + count < 0 || offset + count > max_positive_lock_offset)
610                 count = max_positive_lock_offset - offset;
611
612         /*
613          * If we ate all the count, ignore this lock.
614          */
615
616         if (count == 0) {
617                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
618                                 (double)u_offset, (double)u_count ));
619                 return False;
620         }
621
622         /*
623          * The mapping was successful.
624          */
625
626         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
627                         (double)offset, (double)count ));
628
629         *offset_out = offset;
630         *count_out = count;
631         
632         return True;
633 }
634
635 #if defined(LARGE_SMB_OFF_T)
636 /****************************************************************************
637  Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
638 ****************************************************************************/
639
640 static uint32 map_lock_offset(uint32 high, uint32 low)
641 {
642         unsigned int i;
643         uint32 mask = 0;
644         uint32 highcopy = high;
645
646         /*
647          * Try and find out how many significant bits there are in high.
648          */
649
650         for(i = 0; highcopy; i++)
651                 highcopy >>= 1;
652
653         /*
654          * We use 31 bits not 32 here as POSIX
655          * lock offsets may not be negative.
656          */
657
658         mask = (~0) << (31 - i);
659
660         if(low & mask)
661                 return 0; /* Fail. */
662
663         high <<= (31 - i);
664
665         return (high|low);
666 }
667 #endif
668
669 /****************************************************************************
670  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
671  broken NFS implementations.
672 ****************************************************************************/
673
674 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
675 {
676         int ret;
677         struct connection_struct *conn = fsp->conn;
678
679 #if defined(LARGE_SMB_OFF_T)
680         /*
681          * In the 64 bit locking case we store the original
682          * values in case we have to map to a 32 bit lock on
683          * a filesystem that doesn't support 64 bit locks.
684          */
685         SMB_OFF_T orig_offset = offset;
686         SMB_OFF_T orig_count = count;
687 #endif /* LARGE_SMB_OFF_T */
688
689         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
690
691         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
692
693         if (!ret && (errno == EFBIG)) {
694                 if( DEBUGLVL( 0 )) {
695                         dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
696                         dbgtext("a 'file too large' error. This can happen when using 64 bit lock offsets\n");
697                         dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
698                 }
699                 /* 32 bit NFS file system, retry with smaller offset */
700                 errno = 0;
701                 count &= 0x7fffffff;
702                 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
703         }
704
705         /* A lock query - just return. */
706         if (op == SMB_F_GETLK)
707                 return ret;
708
709         /* A lock set or unset. */
710         if (!ret) {
711                 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
712                                 (double)offset,(double)count,op,type,strerror(errno)));
713
714                 /* Perhaps it doesn't support this sort of locking ? */
715                 if (errno == EINVAL) {
716 #if defined(LARGE_SMB_OFF_T)
717                         {
718                                 /*
719                                  * Ok - if we get here then we have a 64 bit lock request
720                                  * that has returned EINVAL. Try and map to 31 bits for offset
721                                  * and length and try again. This may happen if a filesystem
722                                  * doesn't support 64 bit offsets (efs/ufs) although the underlying
723                                  * OS does.
724                                  */
725                                 uint32 off_low = (orig_offset & 0xFFFFFFFF);
726                                 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
727
728                                 count = (orig_count & 0x7FFFFFFF);
729                                 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
730                                 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
731                                 if (!ret) {
732                                         if (errno == EINVAL) {
733                                                 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
734                                                 return(True);
735                                         }
736                                         return False;
737                                 }
738                                 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
739                                 return True;
740                         }
741 #else /* LARGE_SMB_OFF_T */
742                         DEBUG(3,("locking not supported? returning True\n"));
743                         return(True);
744 #endif /* LARGE_SMB_OFF_T */
745                 }
746
747                 return(False);
748         }
749
750         DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
751
752         return(True);
753 }
754
755 /****************************************************************************
756  POSIX function to see if a file region is locked. Returns True if the
757  region is locked, False otherwise.
758 ****************************************************************************/
759
760 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
761 {
762         SMB_OFF_T offset;
763         SMB_OFF_T count;
764         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
765
766         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
767                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
768
769         /*
770          * If the requested lock won't fit in the POSIX range, we will
771          * never set it, so presume it is not locked.
772          */
773
774         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
775                 return False;
776
777         /*
778          * Note that most UNIX's can *test* for a write lock on
779          * a read-only fd, just not *set* a write lock on a read-only
780          * fd. So we don't need to use map_lock_type here.
781          */ 
782
783         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
784 }
785
786 /*
787  * Structure used when splitting a lock range
788  * into a POSIX lock range. Doubly linked list.
789  */
790
791 struct lock_list {
792     struct lock_list *next;
793     struct lock_list *prev;
794     SMB_OFF_T start;
795     SMB_OFF_T size;
796 };
797
798 /****************************************************************************
799  Create a list of lock ranges that don't overlap a given range. Used in calculating
800  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
801  understand it :-).
802 ****************************************************************************/
803
804 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
805 {
806         TDB_DATA kbuf = locking_key_fsp(fsp);
807         TDB_DATA dbuf;
808         struct posix_lock *locks;
809         size_t num_locks, i;
810
811         dbuf.dptr = NULL;
812
813         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
814
815         if (!dbuf.dptr)
816                 return lhead;
817         
818         locks = (struct posix_lock *)dbuf.dptr;
819         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
820
821         /*
822          * Check the current lock list on this dev/inode pair.
823          * Quit if the list is deleted.
824          */
825
826         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
827                 (double)lhead->start, (double)lhead->size ));
828
829         for (i=0; i<num_locks && lhead; i++) {
830
831                 struct posix_lock *lock = &locks[i];
832                 struct lock_list *l_curr;
833
834                 /*
835                  * Walk the lock list, checking for overlaps. Note that
836                  * the lock list can expand within this loop if the current
837                  * range being examined needs to be split.
838                  */
839
840                 for (l_curr = lhead; l_curr;) {
841
842                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
843                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
844
845                         if ( (l_curr->start >= (lock->start + lock->size)) ||
846                                  (lock->start >= (l_curr->start + l_curr->size))) {
847
848                                 /* No overlap with this lock - leave this range alone. */
849 /*********************************************
850                                              +---------+
851                                              | l_curr  |
852                                              +---------+
853                                 +-------+
854                                 | lock  |
855                                 +-------+
856 OR....
857              +---------+
858              |  l_curr |
859              +---------+
860 **********************************************/
861
862                                 DEBUG(10,("no overlap case.\n" ));
863
864                                 l_curr = l_curr->next;
865
866                         } else if ( (l_curr->start >= lock->start) &&
867                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
868
869                                 /*
870                                  * This unlock is completely overlapped by this existing lock range
871                                  * and thus should have no effect (not be unlocked). Delete it from the list.
872                                  */
873 /*********************************************
874                 +---------+
875                 |  l_curr |
876                 +---------+
877         +---------------------------+
878         |       lock                |
879         +---------------------------+
880 **********************************************/
881                                 /* Save the next pointer */
882                                 struct lock_list *ul_next = l_curr->next;
883
884                                 DEBUG(10,("delete case.\n" ));
885
886                                 DLIST_REMOVE(lhead, l_curr);
887                                 if(lhead == NULL)
888                                         break; /* No more list... */
889
890                                 l_curr = ul_next;
891                                 
892                         } else if ( (l_curr->start >= lock->start) &&
893                                                 (l_curr->start < lock->start + lock->size) &&
894                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
895
896                                 /*
897                                  * This unlock overlaps the existing lock range at the high end.
898                                  * Truncate by moving start to existing range end and reducing size.
899                                  */
900 /*********************************************
901                 +---------------+
902                 |  l_curr       |
903                 +---------------+
904         +---------------+
905         |    lock       |
906         +---------------+
907 BECOMES....
908                         +-------+
909                         | l_curr|
910                         +-------+
911 **********************************************/
912
913                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
914                                 l_curr->start = lock->start + lock->size;
915
916                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
917                                                                 (double)l_curr->start, (double)l_curr->size ));
918
919                                 l_curr = l_curr->next;
920
921                         } else if ( (l_curr->start < lock->start) &&
922                                                 (l_curr->start + l_curr->size > lock->start) &&
923                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
924
925                                 /*
926                                  * This unlock overlaps the existing lock range at the low end.
927                                  * Truncate by reducing size.
928                                  */
929 /*********************************************
930    +---------------+
931    |  l_curr       |
932    +---------------+
933            +---------------+
934            |    lock       |
935            +---------------+
936 BECOMES....
937    +-------+
938    | l_curr|
939    +-------+
940 **********************************************/
941
942                                 l_curr->size = lock->start - l_curr->start;
943
944                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
945                                                                 (double)l_curr->start, (double)l_curr->size ));
946
947                                 l_curr = l_curr->next;
948                 
949                         } else if ( (l_curr->start < lock->start) &&
950                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
951                                 /*
952                                  * Worst case scenario. Unlock request completely overlaps an existing
953                                  * lock range. Split the request into two, push the new (upper) request
954                                  * into the dlink list, and continue with the entry after ul_new (as we
955                                  * know that ul_new will not overlap with this lock).
956                                  */
957 /*********************************************
958         +---------------------------+
959         |        l_curr             |
960         +---------------------------+
961                 +---------+
962                 | lock    |
963                 +---------+
964 BECOMES.....
965         +-------+         +---------+
966         | l_curr|         | l_new   |
967         +-------+         +---------+
968 **********************************************/
969                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
970                                                                                                         sizeof(struct lock_list));
971
972                                 if(l_new == NULL) {
973                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
974                                         return NULL; /* The talloc_destroy takes care of cleanup. */
975                                 }
976
977                                 ZERO_STRUCTP(l_new);
978                                 l_new->start = lock->start + lock->size;
979                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
980
981                                 /* Truncate the l_curr. */
982                                 l_curr->size = lock->start - l_curr->start;
983
984                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
985 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
986                                                                 (double)l_new->start, (double)l_new->size ));
987
988                                 /*
989                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
990                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
991                                  */
992
993                                 l_new->prev = l_curr;
994                                 l_new->next = l_curr->next;
995                                 l_curr->next = l_new;
996
997                                 /* And move after the link we added. */
998                                 l_curr = l_new->next;
999
1000                         } else {
1001
1002                                 /*
1003                                  * This logic case should never happen. Ensure this is the
1004                                  * case by forcing an abort.... Remove in production.
1005                                  */
1006                                 pstring msg;
1007
1008                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
1009 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
1010
1011                                 smb_panic(msg);
1012                         }
1013                 } /* end for ( l_curr = lhead; l_curr;) */
1014         } /* end for (i=0; i<num_locks && ul_head; i++) */
1015
1016         if (dbuf.dptr)
1017                 free(dbuf.dptr);
1018         
1019         return lhead;
1020 }
1021
1022 /****************************************************************************
1023  POSIX function to acquire a lock. Returns True if the
1024  lock could be granted, False if not.
1025 ****************************************************************************/
1026
1027 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
1028 {
1029         SMB_OFF_T offset;
1030         SMB_OFF_T count;
1031         BOOL ret = True;
1032         size_t entry_num = 0;
1033         size_t lock_count;
1034         TALLOC_CTX *l_ctx = NULL;
1035         struct lock_list *llist = NULL;
1036         struct lock_list *ll = NULL;
1037         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1038
1039         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
1040                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1041
1042         /*
1043          * If the requested lock won't fit in the POSIX range, we will
1044          * pretend it was successful.
1045          */
1046
1047         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1048                 return True;
1049
1050         /*
1051          * Windows is very strange. It allows read locks to be overlayed
1052          * (even over a write lock), but leaves the write lock in force until the first
1053          * unlock. It also reference counts the locks. This means the following sequence :
1054          *
1055          * process1                                      process2
1056          * ------------------------------------------------------------------------
1057          * WRITE LOCK : start = 2, len = 10
1058          *                                            READ LOCK: start =0, len = 10 - FAIL
1059          * READ LOCK : start = 0, len = 14 
1060          *                                            READ LOCK: start =0, len = 10 - FAIL
1061          * UNLOCK : start = 2, len = 10
1062          *                                            READ LOCK: start =0, len = 10 - OK
1063          *
1064          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1065          * would leave a single read lock over the 0-14 region. In order to
1066          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1067          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1068          * semantics that if a write lock is added, then it will be first in the array.
1069          */
1070         
1071         if ((l_ctx = talloc_init()) == NULL) {
1072                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1073                 return True; /* Not a fatal error. */
1074         }
1075
1076         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1077                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1078                 talloc_destroy(l_ctx);
1079                 return True; /* Not a fatal error. */
1080         }
1081
1082         /*
1083          * Create the initial list entry containing the
1084          * lock we want to add.
1085          */
1086
1087         ZERO_STRUCTP(ll);
1088         ll->start = offset;
1089         ll->size = count;
1090
1091         DLIST_ADD(llist, ll);
1092
1093         /*
1094          * The following call calculates if there are any
1095          * overlapping locks held by this process on
1096          * fd's open on the same file and splits this list
1097          * into a list of lock ranges that do not overlap with existing
1098          * POSIX locks.
1099          */
1100
1101         llist = posix_lock_list(l_ctx, llist, fsp);
1102
1103         /*
1104          * Now we have the list of ranges to lock it is safe to add the
1105          * entry into the POSIX lock tdb. We take note of the entry we
1106          * added here in case we have to remove it on POSIX lock fail.
1107          */
1108
1109         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1110                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1111                 talloc_destroy(l_ctx);
1112                 return False;
1113         }
1114
1115         /*
1116          * Add the POSIX locks on the list of ranges returned.
1117          * As the lock is supposed to be added atomically, we need to
1118          * back out all the locks if any one of these calls fail.
1119          */
1120
1121         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1122                 offset = ll->start;
1123                 count = ll->size;
1124
1125                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1126                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1127
1128                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1129                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f\n",
1130                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1131                         ret = False;
1132                         break;
1133                 }
1134         }
1135
1136         if (!ret) {
1137
1138                 /*
1139                  * Back out all the POSIX locks we have on fail.
1140                  */
1141
1142                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1143                         offset = ll->start;
1144                         count = ll->size;
1145
1146                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1147                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1148
1149                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1150                 }
1151
1152                 /*
1153                  * Remove the tdb entry for this lock.
1154                  */
1155
1156                 delete_posix_lock_entry_by_index(fsp,entry_num);
1157         }
1158
1159         talloc_destroy(l_ctx);
1160         return ret;
1161 }
1162
1163 /****************************************************************************
1164  POSIX function to release a lock. Returns True if the
1165  lock could be released, False if not.
1166 ****************************************************************************/
1167
1168 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1169 {
1170         SMB_OFF_T offset;
1171         SMB_OFF_T count;
1172         BOOL ret = True;
1173         TALLOC_CTX *ul_ctx = NULL;
1174         struct lock_list *ulist = NULL;
1175         struct lock_list *ul = NULL;
1176         struct posix_lock deleted_lock;
1177         int num_overlapped_entries;
1178
1179         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1180                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1181
1182         /*
1183          * If the requested lock won't fit in the POSIX range, we will
1184          * pretend it was successful.
1185          */
1186
1187         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1188                 return True;
1189
1190         /*
1191          * We treat this as one unlock request for POSIX accounting purposes even
1192          * if it may later be split into multiple smaller POSIX unlock ranges.
1193          * num_overlapped_entries is the number of existing locks that have any
1194          * overlap with this unlock request.
1195          */ 
1196
1197         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1198
1199         if (num_overlapped_entries == -1) {
1200         smb_panic("release_posix_lock: unable find entry to delete !\n");
1201         }
1202
1203         /*
1204          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1205          * a POSIX write lock, then before doing the unlock we need to downgrade
1206          * the POSIX lock to a read lock. This allows any overlapping read locks
1207          * to be atomically maintained.
1208          */
1209
1210         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1211                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1212                         DEBUG(0,("release_posix_lock: downgrade of lock failed !\n"));
1213                         return False;
1214                 }
1215         }
1216
1217         if ((ul_ctx = talloc_init()) == NULL) {
1218                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1219                 return True; /* Not a fatal error. */
1220         }
1221
1222         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1223                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1224                 talloc_destroy(ul_ctx);
1225                 return True; /* Not a fatal error. */
1226         }
1227
1228         /*
1229          * Create the initial list entry containing the
1230          * lock we want to remove.
1231          */
1232
1233         ZERO_STRUCTP(ul);
1234         ul->start = offset;
1235         ul->size = count;
1236
1237         DLIST_ADD(ulist, ul);
1238
1239         /*
1240          * The following call calculates if there are any
1241          * overlapping locks held by this process on
1242          * fd's open on the same file and creates a
1243          * list of unlock ranges that will allow
1244          * POSIX lock ranges to remain on the file whilst the
1245          * unlocks are performed.
1246          */
1247
1248         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1249
1250         /*
1251          * Release the POSIX locks on the list of ranges returned.
1252          */
1253
1254         for(; ulist; ulist = ulist->next) {
1255                 offset = ulist->start;
1256                 count = ulist->size;
1257
1258                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1259                         (double)offset, (double)count ));
1260
1261                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1262                         ret = False;
1263         }
1264
1265         talloc_destroy(ul_ctx);
1266
1267         return ret;
1268 }
1269
1270 /****************************************************************************
1271  Remove all lock entries for a specific dev/inode pair from the tdb.
1272 ****************************************************************************/
1273
1274 static void delete_posix_lock_entries(files_struct *fsp)
1275 {
1276         TDB_DATA kbuf = locking_key_fsp(fsp);
1277
1278         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1279                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1280 }
1281
1282 /****************************************************************************
1283  Debug function.
1284 ****************************************************************************/
1285
1286 static void dump_entry(struct posix_lock *pl)
1287 {
1288         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1289                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1290 }
1291
1292 /****************************************************************************
1293  Remove any locks on this fd. Called from file_close().
1294 ****************************************************************************/
1295
1296 void posix_locking_close_file(files_struct *fsp)
1297 {
1298         struct posix_lock *entries = NULL;
1299         size_t count, i;
1300
1301         /*
1302          * Optimization for the common case where we are the only
1303          * opener of a file. If all fd entries are our own, we don't
1304          * need to explicitly release all the locks via the POSIX functions,
1305          * we can just remove all the entries in the tdb and allow the
1306          * close to remove the real locks.
1307          */
1308
1309         count = get_posix_lock_entries(fsp, &entries);
1310
1311         if (count == 0) {
1312                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1313                 return;
1314         }
1315
1316         for (i = 0; i < count; i++) {
1317                 if (entries[i].fd != fsp->fd )
1318                         break;
1319
1320                 dump_entry(&entries[i]);
1321         }
1322
1323         if (i == count) {
1324                 /* All locks are ours. */
1325                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1326                         fsp->fsp_name, (unsigned int)count ));
1327                 free((char *)entries);
1328                 delete_posix_lock_entries(fsp);
1329                 return;
1330         }
1331
1332         /*
1333          * Difficult case. We need to delete all our locks, whilst leaving
1334          * all other POSIX locks in place.
1335          */
1336
1337         for (i = 0; i < count; i++) {
1338                 struct posix_lock *pl = &entries[i];
1339                 if (pl->fd == fsp->fd)
1340                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1341         }
1342         free((char *)entries);
1343 }
1344
1345 /*******************************************************************
1346  Create the in-memory POSIX lock databases.
1347 ********************************************************************/
1348
1349 BOOL posix_locking_init(int read_only)
1350 {
1351         if (posix_lock_tdb && posix_pending_close_tdb)
1352                 return True;
1353         
1354         if (!posix_lock_tdb)
1355                 posix_lock_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1356                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1357         if (!posix_lock_tdb) {
1358                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1359                 return False;
1360         }
1361         if (!posix_pending_close_tdb)
1362                 posix_pending_close_tdb = tdb_open(NULL, 0, TDB_INTERNAL,
1363                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1364         if (!posix_pending_close_tdb) {
1365                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1366                 return False;
1367         }
1368
1369         return True;
1370 }
1371
1372 /*******************************************************************
1373  Delete the in-memory POSIX lock databases.
1374 ********************************************************************/
1375
1376 BOOL posix_locking_end(void)
1377 {
1378     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1379                 return False;
1380     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1381                 return False;
1382         return True;
1383 }