this is a big global fix for the ptr = Realloc(ptr, size) bug.
[kamenim/samba-autobuild/.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/Netbios implementation.
3    Version 3.0
4    Locking functions
5    Copyright (C) Jeremy Allison 1992-2000
6    
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2 of the License, or
10    (at your option) any later version.
11    
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15    GNU General Public License for more details.
16    
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21    Revision History:
22
23    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
24 */
25
26 #include "includes.h"
27 extern int DEBUGLEVEL;
28
29 /*
30  * The POSIX locking database handle.
31  */
32
33 static TDB_CONTEXT *posix_lock_tdb;
34
35 /*
36  * The pending close database handle.
37  */
38
39 static TDB_CONTEXT *posix_pending_close_tdb;
40
41 /*
42  * The data in POSIX lock records is an unsorted linear array of these
43  * records.  It is unnecessary to store the count as tdb provides the
44  * size of the record.
45  */
46
47 struct posix_lock {
48         int fd;
49         SMB_OFF_T start;
50         SMB_OFF_T size;
51         int lock_type;
52 };
53
54 /*
55  * The data in POSIX pending close records is an unsorted linear array of int
56  * records.  It is unnecessary to store the count as tdb provides the
57  * size of the record.
58  */
59
60 /* The key used in both the POSIX databases. */
61
62 struct posix_lock_key {
63         SMB_DEV_T device;
64         SMB_INO_T inode;
65 }; 
66
67 /*******************************************************************
68  Form a static locking key for a dev/inode pair.
69 ******************************************************************/
70
71 static TDB_DATA locking_key(SMB_DEV_T dev, SMB_INO_T inode)
72 {
73         static struct posix_lock_key key;
74         TDB_DATA kbuf;
75
76         memset(&key, '\0', sizeof(key));
77         key.device = dev;
78         key.inode = inode;
79         kbuf.dptr = (char *)&key;
80         kbuf.dsize = sizeof(key);
81         return kbuf;
82 }
83
84 /*******************************************************************
85  Convenience function to get a key from an fsp.
86 ******************************************************************/
87
88 static TDB_DATA locking_key_fsp(files_struct *fsp)
89 {
90         return locking_key(fsp->dev, fsp->inode);
91 }
92
93 /****************************************************************************
94  Add an fd to the pending close tdb.
95 ****************************************************************************/
96
97 static BOOL add_fd_to_close_entry(files_struct *fsp)
98 {
99         TDB_DATA kbuf = locking_key_fsp(fsp);
100         TDB_DATA dbuf;
101         char *tp;
102
103         dbuf.dptr = NULL;
104
105         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
106
107         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(int));
108         if (!tp) {
109                 DEBUG(0,("add_fd_to_close_entry: Realloc fail !\n"));
110                 if (dbuf.dptr) free(dbuf.dptr);
111                 return False;
112         }
113         else dbuf.dptr = tp;
114         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fd, sizeof(int));
115         dbuf.dsize += sizeof(int);
116
117         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
118                 DEBUG(0,("add_fd_to_close_entry: tdb_store fail !\n"));
119         }
120
121         free(dbuf.dptr);
122         return True;
123 }
124
125 /****************************************************************************
126  Remove all fd entries for a specific dev/inode pair from the tdb.
127 ****************************************************************************/
128
129 static void delete_close_entries(files_struct *fsp)
130 {
131         TDB_DATA kbuf = locking_key_fsp(fsp);
132
133         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1)
134                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
135 }
136
137 /****************************************************************************
138  Get the array of POSIX pending close records for an open fsp. Caller must
139  free. Returns number of entries.
140 ****************************************************************************/
141
142 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
143 {
144         TDB_DATA kbuf = locking_key_fsp(fsp);
145         TDB_DATA dbuf;
146         size_t count = 0;
147
148         *entries = NULL;
149         dbuf.dptr = NULL;
150
151         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
152
153     if (!dbuf.dptr) {
154                 return 0;
155         }
156
157         *entries = (int *)dbuf.dptr;
158         count = (size_t)(dbuf.dsize / sizeof(int));
159
160         return count;
161 }
162
163 /****************************************************************************
164  Get the array of POSIX locks for an fsp. Caller must free. Returns
165  number of entries.
166 ****************************************************************************/
167
168 static size_t get_posix_lock_entries(files_struct *fsp, struct posix_lock **entries)
169 {
170         TDB_DATA kbuf = locking_key_fsp(fsp);
171         TDB_DATA dbuf;
172         size_t count = 0;
173
174         *entries = NULL;
175
176         dbuf.dptr = NULL;
177
178         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
179
180     if (!dbuf.dptr) {
181                 return 0;
182         }
183
184         *entries = (struct posix_lock *)dbuf.dptr;
185         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
186
187         return count;
188 }
189
190 /****************************************************************************
191  Deal with pending closes needed by POSIX locking support.
192  Note that posix_locking_close_file() is expected to have been called
193  to delete all locks on this fsp before this function is called.
194 ****************************************************************************/
195
196 int fd_close_posix(struct connection_struct *conn, files_struct *fsp)
197 {
198         int saved_errno = 0;
199         int ret;
200         size_t count, i;
201         struct posix_lock *entries = NULL;
202         int *fd_array = NULL;
203         BOOL locks_on_other_fds = False;
204
205         if (!lp_posix_locking(SNUM(conn))) {
206                 /*
207                  * No POSIX to worry about, just close.
208                  */
209                 ret = conn->vfs_ops.close(fsp,fsp->fd);
210                 fsp->fd = -1;
211                 return ret;
212         }
213
214         /*
215          * Get the number of outstanding POSIX locks on this dev/inode pair.
216          */
217
218         count = get_posix_lock_entries(fsp, &entries);
219
220         /*
221          * Check if there are any outstanding locks belonging to
222          * other fd's. This should never be the case if posix_locking_close_file()
223          * has been called first, but it never hurts to be *sure*.
224          */
225
226         for (i = 0; i < count; i++) {
227                 if (entries[i].fd != fsp->fd) {
228                         locks_on_other_fds = True;
229                         break;
230                 }
231         }
232
233         if (locks_on_other_fds) {
234
235                 /*
236                  * There are outstanding locks on this dev/inode pair on other fds.
237                  * Add our fd to the pending close tdb and set fsp->fd to -1.
238                  */
239
240                 if (!add_fd_to_close_entry(fsp)) {
241                         free((char *)entries);
242                         return False;
243                 }
244
245                 free((char *)entries);
246                 fsp->fd = -1;
247                 return 0;
248         }
249
250         if(entries)
251                 free((char *)entries);
252
253         /*
254          * No outstanding POSIX locks. Get the pending close fd's
255          * from the tdb and close them all.
256          */
257
258         count = get_posix_pending_close_entries(fsp, &fd_array);
259
260         if (count) {
261                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
262
263                 for(i = 0; i < count; i++) {
264                         if (conn->vfs_ops.close(fsp,fd_array[i]) == -1) {
265                                 saved_errno = errno;
266                         }
267                 }
268
269                 /*
270                  * Delete all fd's stored in the tdb
271                  * for this dev/inode pair.
272                  */
273
274                 delete_close_entries(fsp);
275         }
276
277         if (fd_array)
278                 free((char *)fd_array);
279
280         /*
281          * Finally close the fd associated with this fsp.
282          */
283
284         ret = conn->vfs_ops.close(fsp,fsp->fd);
285
286         if (saved_errno != 0) {
287         errno = saved_errno;
288                 ret = -1;
289     } 
290
291         fsp->fd = -1;
292
293         return ret;
294 }
295
296 /****************************************************************************
297  Debugging aid :-).
298 ****************************************************************************/
299
300 static const char *posix_lock_type_name(int lock_type)
301 {
302         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
303 }
304
305 /****************************************************************************
306  Delete a POSIX lock entry by index number. Used if the tdb add succeeds, but
307  then the POSIX fcntl lock fails.
308 ****************************************************************************/
309
310 static BOOL delete_posix_lock_entry_by_index(files_struct *fsp, size_t entry)
311 {
312         TDB_DATA kbuf = locking_key_fsp(fsp);
313         TDB_DATA dbuf;
314         struct posix_lock *locks;
315         size_t count;
316
317         dbuf.dptr = NULL;
318         
319         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
320
321         if (!dbuf.dptr) {
322                 DEBUG(10,("delete_posix_lock_entry_by_index: tdb_fetch failed !\n"));
323                 goto fail;
324         }
325
326         count = (size_t)(dbuf.dsize / sizeof(struct posix_lock));
327         locks = (struct posix_lock *)dbuf.dptr;
328
329         if (count == 1) {
330                 tdb_delete(posix_lock_tdb, kbuf);
331         } else {
332                 if (entry < count-1) {
333                         memmove(&locks[entry], &locks[entry+1], sizeof(*locks)*((count-1) - entry));
334                 }
335                 dbuf.dsize -= sizeof(*locks);
336                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
337         }
338
339         free(dbuf.dptr);
340
341         return True;
342
343  fail:
344     if (dbuf.dptr)
345                 free(dbuf.dptr);
346     return False;
347 }
348
349 /****************************************************************************
350  Add an entry into the POSIX locking tdb. We return the index number of the
351  added lock (used in case we need to delete *exactly* this entry). Returns
352  False on fail, True on success.
353 ****************************************************************************/
354
355 static BOOL add_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, int lock_type, size_t *pentry_num)
356 {
357         TDB_DATA kbuf = locking_key_fsp(fsp);
358         TDB_DATA dbuf;
359         struct posix_lock pl;
360         char *tp;
361
362         dbuf.dptr = NULL;
363
364         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
365
366         *pentry_num = (size_t)(dbuf.dsize / sizeof(pl));
367
368         /*
369          * Add new record.
370          */
371
372         pl.fd = fsp->fd;
373         pl.start = start;
374         pl.size = size;
375         pl.lock_type = lock_type;
376
377         tp = Realloc(dbuf.dptr, dbuf.dsize + sizeof(pl));
378         if (!tp) {
379                 DEBUG(0,("add_posix_lock_entry: Realloc fail !\n"));
380                 goto fail;
381         }
382         else dbuf.dptr = tp;
383
384         memcpy(dbuf.dptr + dbuf.dsize, &pl, sizeof(pl));
385         dbuf.dsize += sizeof(pl);
386
387         if (tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
388                 DEBUG(0,("add_posix_lock: Failed to add lock entry on file %s\n", fsp->fsp_name));
389                 goto fail;
390         }
391
392     free(dbuf.dptr);
393
394         DEBUG(10,("add_posix_lock: File %s: type = %s: start=%.0f size=%.0f: dev=%.0f inode=%.0f\n",
395                         fsp->fsp_name, posix_lock_type_name(lock_type), (double)start, (double)size,
396                         (double)fsp->dev, (double)fsp->inode ));
397
398     return True;
399
400  fail:
401     if (dbuf.dptr)
402                 free(dbuf.dptr);
403     return False;
404 }
405
406 /****************************************************************************
407  Calculate if locks have any overlap at all.
408 ****************************************************************************/
409
410 static BOOL does_lock_overlap(SMB_OFF_T start1, SMB_OFF_T size1, SMB_OFF_T start2, SMB_OFF_T size2)
411 {
412         if (start1 >= start2 && start1 <= start2 + size2)
413                 return True;
414
415         if (start1 < start2 && start1 + size1 > start2)
416                 return True;
417
418         return False;
419 }
420
421 /****************************************************************************
422  Delete an entry from the POSIX locking tdb. Returns a copy of the entry being
423  deleted and the number of records that are overlapped by this one, or -1 on error.
424 ****************************************************************************/
425
426 static int delete_posix_lock_entry(files_struct *fsp, SMB_OFF_T start, SMB_OFF_T size, struct posix_lock *pl)
427 {
428         TDB_DATA kbuf = locking_key_fsp(fsp);
429         TDB_DATA dbuf;
430         struct posix_lock *locks;
431         size_t i, count;
432         BOOL found = False;
433         int num_overlapping_records = 0;
434
435         dbuf.dptr = NULL;
436         
437         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
438
439         if (!dbuf.dptr) {
440                 DEBUG(10,("delete_posix_lock_entry: tdb_fetch failed !\n"));
441                 goto fail;
442         }
443
444         /* There are existing locks - find a match. */
445         locks = (struct posix_lock *)dbuf.dptr;
446         count = (size_t)(dbuf.dsize / sizeof(*locks));
447
448         /*
449          * Search for and delete the first record that matches the
450          * unlock criteria.
451          */
452
453         for (i=0; i<count; i++) { 
454                 struct posix_lock *entry = &locks[i];
455
456                 if (entry->fd == fsp->fd &&
457                         entry->start == start &&
458                         entry->size == size) {
459
460                         /* Make a copy if requested. */
461                         if (pl)
462                                 *pl = *entry;
463
464                         /* Found it - delete it. */
465                         if (count == 1) {
466                                 tdb_delete(posix_lock_tdb, kbuf);
467                         } else {
468                                 if (i < count-1) {
469                                         memmove(&locks[i], &locks[i+1], sizeof(*locks)*((count-1) - i));
470                                 }
471                                 dbuf.dsize -= sizeof(*locks);
472                                 tdb_store(posix_lock_tdb, kbuf, dbuf, TDB_REPLACE);
473                         }
474                         count--;
475                         found = True;
476                         break;
477                 }
478         }
479
480         if (!found)
481                 goto fail;
482
483         /*
484          * Count the number of entries that are
485          * overlapped by this unlock request.
486          */
487
488         for (i = 0; i < count; i++) {
489                 struct posix_lock *entry = &locks[i];
490
491                 if (fsp->fd == entry->fd &&
492                         does_lock_overlap( start, size, entry->start, entry->size))
493                                 num_overlapping_records++;
494         }
495
496         DEBUG(10,("delete_posix_lock_entry: type = %s: start=%.0f size=%.0f, num_records = %d\n",
497                         posix_lock_type_name(pl->lock_type), (double)pl->start, (double)pl->size,
498                                 (unsigned int)num_overlapping_records ));
499
500     if (dbuf.dptr)
501                 free(dbuf.dptr);
502
503         return num_overlapping_records;
504
505  fail:
506     if (dbuf.dptr)
507                 free(dbuf.dptr);
508     return -1;
509 }
510
511 /****************************************************************************
512  Utility function to map a lock type correctly depending on the open
513  mode of a file.
514 ****************************************************************************/
515
516 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
517 {
518         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
519                 /*
520                  * Many UNIX's cannot get a write lock on a file opened read-only.
521                  * Win32 locking semantics allow this.
522                  * Do the best we can and attempt a read-only lock.
523                  */
524                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
525                 return F_RDLCK;
526         } else if((lock_type == READ_LOCK) && !fsp->can_read) {
527                 /*
528                  * Ditto for read locks on write only files.
529                  */
530                 DEBUG(10,("map_posix_lock_type: Changing read lock to write due to write-only file.\n"));
531                 return F_WRLCK;
532         }
533
534   /*
535    * This return should be the most normal, as we attempt
536    * to always open files read/write.
537    */
538
539   return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
540 }
541
542 /****************************************************************************
543  Check to see if the given unsigned lock range is within the possible POSIX
544  range. Modifies the given args to be in range if possible, just returns
545  False if not.
546 ****************************************************************************/
547
548 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
549                                                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
550 {
551         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
552         SMB_OFF_T count = (SMB_OFF_T)u_count;
553
554         /*
555          * For the type of system we are, attempt to
556          * find the maximum positive lock offset as an SMB_OFF_T.
557          */
558
559 #if defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
560
561         /*
562          * In this case SMB_OFF_T is 64 bits,
563          * and the underlying system can handle 64 bit signed locks.
564          */
565
566     SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
567     SMB_OFF_T mask = (mask2<<1);
568     SMB_OFF_T max_positive_lock_offset = ~mask;
569
570 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
571
572         /*
573          * In this case either SMB_OFF_T is 32 bits,
574          * or the underlying system cannot handle 64 bit signed locks.
575          * All offsets & counts must be 2^31 or less.
576          */
577
578     SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
579
580 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
581
582         /*
583          * If the given offset was > max_positive_lock_offset then we cannot map this at all
584          * ignore this lock.
585          */
586
587         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
588                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
589                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
590                 return False;
591         }
592
593         /*
594          * We must truncate the offset and count to less than max_positive_lock_offset.
595          */
596
597         offset &= max_positive_lock_offset;
598         count &= max_positive_lock_offset;
599
600
601         /*
602          * Deal with a very common case of count of all ones.
603          * (lock entire file).
604          */
605
606         if(count == (SMB_OFF_T)-1)
607                 count = max_positive_lock_offset;
608
609         /*
610          * Truncate count to end at max lock offset.
611          */
612
613         if (offset + count < 0 || offset + count > max_positive_lock_offset)
614                 count = max_positive_lock_offset - offset;
615
616         /*
617          * If we ate all the count, ignore this lock.
618          */
619
620         if (count == 0) {
621                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
622                                 (double)u_offset, (double)u_count ));
623                 return False;
624         }
625
626         /*
627          * The mapping was successful.
628          */
629
630         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
631                         (double)offset, (double)count ));
632
633         *offset_out = offset;
634         *count_out = count;
635         
636         return True;
637 }
638
639 /****************************************************************************
640  Pathetically try and map a 64 bit lock offset into 31 bits. I hate Windows :-).
641 ****************************************************************************/
642
643 uint32 map_lock_offset(uint32 high, uint32 low)
644 {
645         unsigned int i;
646         uint32 mask = 0;
647         uint32 highcopy = high;
648
649         /*
650          * Try and find out how many significant bits there are in high.
651          */
652
653         for(i = 0; highcopy; i++)
654                 highcopy >>= 1;
655
656         /*
657          * We use 31 bits not 32 here as POSIX
658          * lock offsets may not be negative.
659          */
660
661         mask = (~0) << (31 - i);
662
663         if(low & mask)
664                 return 0; /* Fail. */
665
666         high <<= (31 - i);
667
668         return (high|low);
669 }
670
671 /****************************************************************************
672  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
673  broken NFS implementations.
674 ****************************************************************************/
675
676 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
677 {
678         int ret;
679         struct connection_struct *conn = fsp->conn;
680
681 #if defined(LARGE_SMB_OFF_T)
682         /*
683          * In the 64 bit locking case we store the original
684          * values in case we have to map to a 32 bit lock on
685          * a filesystem that doesn't support 64 bit locks.
686          */
687         SMB_OFF_T orig_offset = offset;
688         SMB_OFF_T orig_count = count;
689 #endif /* LARGE_SMB_OFF_T */
690
691         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fd,op,(double)offset,(double)count,type));
692
693         ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
694
695         if (!ret && (errno == EFBIG)) {
696                 if( DEBUGLVL( 0 )) {
697                         dbgtext("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n", (double)offset,(double)count);
698                         dbgtext("a 'file too large' error. This can happen when using 64 bit lock offsets\n");
699                         dbgtext("on 32 bit NFS mounted file systems. Retrying with 32 bit truncated length.\n");
700                 }
701                 /* 32 bit NFS file system, retry with smaller offset */
702                 errno = 0;
703                 count &= 0x7fffffff;
704                 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
705         }
706
707         /* A lock query - just return. */
708         if (op == SMB_F_GETLK)
709                 return ret;
710
711         /* A lock set or unset. */
712         if (!ret) {
713                 DEBUG(3,("posix_fcntl_lock: lock failed at offset %.0f count %.0f op %d type %d (%s)\n",
714                                 (double)offset,(double)count,op,type,strerror(errno)));
715
716                 /* Perhaps it doesn't support this sort of locking ? */
717                 if (errno == EINVAL) {
718 #if defined(LARGE_SMB_OFF_T)
719                         {
720                                 /*
721                                  * Ok - if we get here then we have a 64 bit lock request
722                                  * that has returned EINVAL. Try and map to 31 bits for offset
723                                  * and length and try again. This may happen if a filesystem
724                                  * doesn't support 64 bit offsets (efs/ufs) although the underlying
725                                  * OS does.
726                                  */
727                                 uint32 off_low = (orig_offset & 0xFFFFFFFF);
728                                 uint32 off_high = ((orig_offset >> 32) & 0xFFFFFFFF);
729
730                                 count = (orig_count & 0x7FFFFFFF);
731                                 offset = (SMB_OFF_T)map_lock_offset(off_high, off_low);
732                                 ret = conn->vfs_ops.lock(fsp,fsp->fd,op,offset,count,type);
733                                 if (!ret) {
734                                         if (errno == EINVAL) {
735                                                 DEBUG(3,("posix_fcntl_lock: locking not supported? returning True\n"));
736                                                 return(True);
737                                         }
738                                         return False;
739                                 }
740                                 DEBUG(3,("posix_fcntl_lock: 64 -> 32 bit modified lock call successful\n"));
741                                 return True;
742                         }
743 #else /* LARGE_SMB_OFF_T */
744                         DEBUG(3,("locking not supported? returning True\n"));
745                         return(True);
746 #endif /* LARGE_SMB_OFF_T */
747                 }
748
749                 return(False);
750         }
751
752         DEBUG(8,("posix_fcntl_lock: Lock call successful\n"));
753
754         return(True);
755 }
756
757 /****************************************************************************
758  POSIX function to see if a file region is locked. Returns True if the
759  region is locked, False otherwise.
760 ****************************************************************************/
761
762 BOOL is_posix_locked(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
763 {
764         SMB_OFF_T offset;
765         SMB_OFF_T count;
766         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
767
768         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
769                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
770
771         /*
772          * If the requested lock won't fit in the POSIX range, we will
773          * never set it, so presume it is not locked.
774          */
775
776         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
777                 return False;
778
779         /*
780          * Note that most UNIX's can *test* for a write lock on
781          * a read-only fd, just not *set* a write lock on a read-only
782          * fd. So we don't need to use map_lock_type here.
783          */ 
784
785         return posix_fcntl_lock(fsp,SMB_F_GETLK,offset,count,posix_lock_type);
786 }
787
788 /*
789  * Structure used when splitting a lock range
790  * into a POSIX lock range. Doubly linked list.
791  */
792
793 struct lock_list {
794     struct lock_list *next;
795     struct lock_list *prev;
796     SMB_OFF_T start;
797     SMB_OFF_T size;
798 };
799
800 /****************************************************************************
801  Create a list of lock ranges that don't overlap a given range. Used in calculating
802  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
803  understand it :-).
804 ****************************************************************************/
805
806 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx, struct lock_list *lhead, files_struct *fsp)
807 {
808         TDB_DATA kbuf = locking_key_fsp(fsp);
809         TDB_DATA dbuf;
810         struct posix_lock *locks;
811         size_t num_locks, i;
812
813         dbuf.dptr = NULL;
814
815         dbuf = tdb_fetch(posix_lock_tdb, kbuf);
816
817         if (!dbuf.dptr)
818                 return lhead;
819         
820         locks = (struct posix_lock *)dbuf.dptr;
821         num_locks = (size_t)(dbuf.dsize / sizeof(*locks));
822
823         /*
824          * Check the current lock list on this dev/inode pair.
825          * Quit if the list is deleted.
826          */
827
828         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
829                 (double)lhead->start, (double)lhead->size ));
830
831         for (i=0; i<num_locks && lhead; i++) {
832
833                 struct posix_lock *lock = &locks[i];
834                 struct lock_list *l_curr;
835
836                 /*
837                  * Walk the lock list, checking for overlaps. Note that
838                  * the lock list can expand within this loop if the current
839                  * range being examined needs to be split.
840                  */
841
842                 for (l_curr = lhead; l_curr;) {
843
844                         DEBUG(10,("posix_lock_list: lock: fd=%d: start=%.0f,size=%.0f:type=%s", lock->fd,
845                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
846
847                         if ( (l_curr->start >= (lock->start + lock->size)) ||
848                                  (lock->start >= (l_curr->start + l_curr->size))) {
849
850                                 /* No overlap with this lock - leave this range alone. */
851 /*********************************************
852                                              +---------+
853                                              | l_curr  |
854                                              +---------+
855                                 +-------+
856                                 | lock  |
857                                 +-------+
858 OR....
859              +---------+
860              |  l_curr |
861              +---------+
862 **********************************************/
863
864                                 DEBUG(10,("no overlap case.\n" ));
865
866                                 l_curr = l_curr->next;
867
868                         } else if ( (l_curr->start >= lock->start) &&
869                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
870
871                                 /*
872                                  * This unlock is completely overlapped by this existing lock range
873                                  * and thus should have no effect (not be unlocked). Delete it from the list.
874                                  */
875 /*********************************************
876                 +---------+
877                 |  l_curr |
878                 +---------+
879         +---------------------------+
880         |       lock                |
881         +---------------------------+
882 **********************************************/
883                                 /* Save the next pointer */
884                                 struct lock_list *ul_next = l_curr->next;
885
886                                 DEBUG(10,("delete case.\n" ));
887
888                                 DLIST_REMOVE(lhead, l_curr);
889                                 if(lhead == NULL)
890                                         break; /* No more list... */
891
892                                 l_curr = ul_next;
893                                 
894                         } else if ( (l_curr->start >= lock->start) &&
895                                                 (l_curr->start < lock->start + lock->size) &&
896                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
897
898                                 /*
899                                  * This unlock overlaps the existing lock range at the high end.
900                                  * Truncate by moving start to existing range end and reducing size.
901                                  */
902 /*********************************************
903                 +---------------+
904                 |  l_curr       |
905                 +---------------+
906         +---------------+
907         |    lock       |
908         +---------------+
909 BECOMES....
910                         +-------+
911                         | l_curr|
912                         +-------+
913 **********************************************/
914
915                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
916                                 l_curr->start = lock->start + lock->size;
917
918                                 DEBUG(10,("truncate high case: start=%.0f,size=%.0f\n",
919                                                                 (double)l_curr->start, (double)l_curr->size ));
920
921                                 l_curr = l_curr->next;
922
923                         } else if ( (l_curr->start < lock->start) &&
924                                                 (l_curr->start + l_curr->size > lock->start) &&
925                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
926
927                                 /*
928                                  * This unlock overlaps the existing lock range at the low end.
929                                  * Truncate by reducing size.
930                                  */
931 /*********************************************
932    +---------------+
933    |  l_curr       |
934    +---------------+
935            +---------------+
936            |    lock       |
937            +---------------+
938 BECOMES....
939    +-------+
940    | l_curr|
941    +-------+
942 **********************************************/
943
944                                 l_curr->size = lock->start - l_curr->start;
945
946                                 DEBUG(10,("truncate low case: start=%.0f,size=%.0f\n",
947                                                                 (double)l_curr->start, (double)l_curr->size ));
948
949                                 l_curr = l_curr->next;
950                 
951                         } else if ( (l_curr->start < lock->start) &&
952                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
953                                 /*
954                                  * Worst case scenario. Unlock request completely overlaps an existing
955                                  * lock range. Split the request into two, push the new (upper) request
956                                  * into the dlink list, and continue with the entry after ul_new (as we
957                                  * know that ul_new will not overlap with this lock).
958                                  */
959 /*********************************************
960         +---------------------------+
961         |        l_curr             |
962         +---------------------------+
963                 +---------+
964                 | lock    |
965                 +---------+
966 BECOMES.....
967         +-------+         +---------+
968         | l_curr|         | l_new   |
969         +-------+         +---------+
970 **********************************************/
971                                 struct lock_list *l_new = (struct lock_list *)talloc(ctx,
972                                                                                                         sizeof(struct lock_list));
973
974                                 if(l_new == NULL) {
975                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
976                                         return NULL; /* The talloc_destroy takes care of cleanup. */
977                                 }
978
979                                 ZERO_STRUCTP(l_new);
980                                 l_new->start = lock->start + lock->size;
981                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
982
983                                 /* Truncate the l_curr. */
984                                 l_curr->size = lock->start - l_curr->start;
985
986                                 DEBUG(10,("split case: curr: start=%.0f,size=%.0f \
987 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
988                                                                 (double)l_new->start, (double)l_new->size ));
989
990                                 /*
991                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
992                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
993                                  */
994
995                                 l_new->prev = l_curr;
996                                 l_new->next = l_curr->next;
997                                 l_curr->next = l_new;
998
999                                 /* And move after the link we added. */
1000                                 l_curr = l_new->next;
1001
1002                         } else {
1003
1004                                 /*
1005                                  * This logic case should never happen. Ensure this is the
1006                                  * case by forcing an abort.... Remove in production.
1007                                  */
1008                                 pstring msg;
1009
1010                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
1011 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
1012
1013                                 smb_panic(msg);
1014                         }
1015                 } /* end for ( l_curr = lhead; l_curr;) */
1016         } /* end for (i=0; i<num_locks && ul_head; i++) */
1017
1018         if (dbuf.dptr)
1019                 free(dbuf.dptr);
1020         
1021         return lhead;
1022 }
1023
1024 /****************************************************************************
1025  POSIX function to acquire a lock. Returns True if the
1026  lock could be granted, False if not.
1027 ****************************************************************************/
1028
1029 BOOL set_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count, enum brl_type lock_type)
1030 {
1031         SMB_OFF_T offset;
1032         SMB_OFF_T count;
1033         BOOL ret = True;
1034         size_t entry_num = 0;
1035         size_t lock_count;
1036         TALLOC_CTX *l_ctx = NULL;
1037         struct lock_list *llist = NULL;
1038         struct lock_list *ll = NULL;
1039         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1040
1041         DEBUG(5,("set_posix_lock: File %s, offset = %.0f, count = %.0f, type = %s\n",
1042                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1043
1044         /*
1045          * If the requested lock won't fit in the POSIX range, we will
1046          * pretend it was successful.
1047          */
1048
1049         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1050                 return True;
1051
1052         /*
1053          * Windows is very strange. It allows read locks to be overlayed
1054          * (even over a write lock), but leaves the write lock in force until the first
1055          * unlock. It also reference counts the locks. This means the following sequence :
1056          *
1057          * process1                                      process2
1058          * ------------------------------------------------------------------------
1059          * WRITE LOCK : start = 2, len = 10
1060          *                                            READ LOCK: start =0, len = 10 - FAIL
1061          * READ LOCK : start = 0, len = 14 
1062          *                                            READ LOCK: start =0, len = 10 - FAIL
1063          * UNLOCK : start = 2, len = 10
1064          *                                            READ LOCK: start =0, len = 10 - OK
1065          *
1066          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1067          * would leave a single read lock over the 0-14 region. In order to
1068          * re-create Windows semantics mapped to POSIX locks, we create multiple TDB
1069          * entries, one for each overlayed lock request. We are guarenteed by the brlock
1070          * semantics that if a write lock is added, then it will be first in the array.
1071          */
1072         
1073         if ((l_ctx = talloc_init()) == NULL) {
1074                 DEBUG(0,("set_posix_lock: unable to init talloc context.\n"));
1075                 return True; /* Not a fatal error. */
1076         }
1077
1078         if ((ll = (struct lock_list *)talloc(l_ctx, sizeof(struct lock_list))) == NULL) {
1079                 DEBUG(0,("set_posix_lock: unable to talloc unlock list.\n"));
1080                 talloc_destroy(l_ctx);
1081                 return True; /* Not a fatal error. */
1082         }
1083
1084         /*
1085          * Create the initial list entry containing the
1086          * lock we want to add.
1087          */
1088
1089         ZERO_STRUCTP(ll);
1090         ll->start = offset;
1091         ll->size = count;
1092
1093         DLIST_ADD(llist, ll);
1094
1095         /*
1096          * The following call calculates if there are any
1097          * overlapping locks held by this process on
1098          * fd's open on the same file and splits this list
1099          * into a list of lock ranges that do not overlap with existing
1100          * POSIX locks.
1101          */
1102
1103         llist = posix_lock_list(l_ctx, llist, fsp);
1104
1105         /*
1106          * Now we have the list of ranges to lock it is safe to add the
1107          * entry into the POSIX lock tdb. We take note of the entry we
1108          * added here in case we have to remove it on POSIX lock fail.
1109          */
1110
1111         if (!add_posix_lock_entry(fsp,offset,count,posix_lock_type,&entry_num)) {
1112                 DEBUG(0,("set_posix_lock: Unable to create posix lock entry !\n"));
1113                 talloc_destroy(l_ctx);
1114                 return False;
1115         }
1116
1117         /*
1118          * Add the POSIX locks on the list of ranges returned.
1119          * As the lock is supposed to be added atomically, we need to
1120          * back out all the locks if any one of these calls fail.
1121          */
1122
1123         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1124                 offset = ll->start;
1125                 count = ll->size;
1126
1127                 DEBUG(5,("set_posix_lock: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1128                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1129
1130                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1131                         DEBUG(5,("set_posix_lock: Lock fail !: Type = %s: offset = %.0f, count = %.0f\n",
1132                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1133                         ret = False;
1134                         break;
1135                 }
1136         }
1137
1138         if (!ret) {
1139
1140                 /*
1141                  * Back out all the POSIX locks we have on fail.
1142                  */
1143
1144                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1145                         offset = ll->start;
1146                         count = ll->size;
1147
1148                         DEBUG(5,("set_posix_lock: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1149                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1150
1151                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1152                 }
1153
1154                 /*
1155                  * Remove the tdb entry for this lock.
1156                  */
1157
1158                 delete_posix_lock_entry_by_index(fsp,entry_num);
1159         }
1160
1161         talloc_destroy(l_ctx);
1162         return ret;
1163 }
1164
1165 /****************************************************************************
1166  POSIX function to release a lock. Returns True if the
1167  lock could be released, False if not.
1168 ****************************************************************************/
1169
1170 BOOL release_posix_lock(files_struct *fsp, SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
1171 {
1172         SMB_OFF_T offset;
1173         SMB_OFF_T count;
1174         BOOL ret = True;
1175         TALLOC_CTX *ul_ctx = NULL;
1176         struct lock_list *ulist = NULL;
1177         struct lock_list *ul = NULL;
1178         struct posix_lock deleted_lock;
1179         int num_overlapped_entries;
1180
1181         DEBUG(5,("release_posix_lock: File %s, offset = %.0f, count = %.0f\n",
1182                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1183
1184         /*
1185          * If the requested lock won't fit in the POSIX range, we will
1186          * pretend it was successful.
1187          */
1188
1189         if(!posix_lock_in_range(&offset, &count, u_offset, u_count))
1190                 return True;
1191
1192         /*
1193          * We treat this as one unlock request for POSIX accounting purposes even
1194          * if it may later be split into multiple smaller POSIX unlock ranges.
1195          * num_overlapped_entries is the number of existing locks that have any
1196          * overlap with this unlock request.
1197          */ 
1198
1199         num_overlapped_entries = delete_posix_lock_entry(fsp, offset, count, &deleted_lock);
1200
1201         if (num_overlapped_entries == -1) {
1202         smb_panic("release_posix_lock: unable find entry to delete !\n");
1203         }
1204
1205         /*
1206          * If num_overlapped_entries is > 0, and the lock_type we just deleted from the tdb was
1207          * a POSIX write lock, then before doing the unlock we need to downgrade
1208          * the POSIX lock to a read lock. This allows any overlapping read locks
1209          * to be atomically maintained.
1210          */
1211
1212         if (num_overlapped_entries > 0 && deleted_lock.lock_type == F_WRLCK) {
1213                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1214                         DEBUG(0,("release_posix_lock: downgrade of lock failed !\n"));
1215                         return False;
1216                 }
1217         }
1218
1219         if ((ul_ctx = talloc_init()) == NULL) {
1220                 DEBUG(0,("release_posix_lock: unable to init talloc context.\n"));
1221                 return True; /* Not a fatal error. */
1222         }
1223
1224         if ((ul = (struct lock_list *)talloc(ul_ctx, sizeof(struct lock_list))) == NULL) {
1225                 DEBUG(0,("release_posix_lock: unable to talloc unlock list.\n"));
1226                 talloc_destroy(ul_ctx);
1227                 return True; /* Not a fatal error. */
1228         }
1229
1230         /*
1231          * Create the initial list entry containing the
1232          * lock we want to remove.
1233          */
1234
1235         ZERO_STRUCTP(ul);
1236         ul->start = offset;
1237         ul->size = count;
1238
1239         DLIST_ADD(ulist, ul);
1240
1241         /*
1242          * The following call calculates if there are any
1243          * overlapping locks held by this process on
1244          * fd's open on the same file and creates a
1245          * list of unlock ranges that will allow
1246          * POSIX lock ranges to remain on the file whilst the
1247          * unlocks are performed.
1248          */
1249
1250         ulist = posix_lock_list(ul_ctx, ulist, fsp);
1251
1252         /*
1253          * Release the POSIX locks on the list of ranges returned.
1254          */
1255
1256         for(; ulist; ulist = ulist->next) {
1257                 offset = ulist->start;
1258                 count = ulist->size;
1259
1260                 DEBUG(5,("release_posix_lock: Real unlock: offset = %.0f, count = %.0f\n",
1261                         (double)offset, (double)count ));
1262
1263                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK))
1264                         ret = False;
1265         }
1266
1267         talloc_destroy(ul_ctx);
1268
1269         return ret;
1270 }
1271
1272 /****************************************************************************
1273  Remove all lock entries for a specific dev/inode pair from the tdb.
1274 ****************************************************************************/
1275
1276 static void delete_posix_lock_entries(files_struct *fsp)
1277 {
1278         TDB_DATA kbuf = locking_key_fsp(fsp);
1279
1280         if (tdb_delete(posix_lock_tdb, kbuf) == -1)
1281                 DEBUG(0,("delete_close_entries: tdb_delete fail !\n"));
1282 }
1283
1284 /****************************************************************************
1285  Debug function.
1286 ****************************************************************************/
1287
1288 static void dump_entry(struct posix_lock *pl)
1289 {
1290         DEBUG(10,("entry: start=%.0f, size=%.0f, type=%d, fd=%i\n",
1291                 (double)pl->start, (double)pl->size, (int)pl->lock_type, pl->fd ));
1292 }
1293
1294 /****************************************************************************
1295  Remove any locks on this fd. Called from file_close().
1296 ****************************************************************************/
1297
1298 void posix_locking_close_file(files_struct *fsp)
1299 {
1300         struct posix_lock *entries = NULL;
1301         size_t count, i;
1302
1303         /*
1304          * Optimization for the common case where we are the only
1305          * opener of a file. If all fd entries are our own, we don't
1306          * need to explicitly release all the locks via the POSIX functions,
1307          * we can just remove all the entries in the tdb and allow the
1308          * close to remove the real locks.
1309          */
1310
1311         count = get_posix_lock_entries(fsp, &entries);
1312
1313         if (count == 0) {
1314                 DEBUG(10,("posix_locking_close_file: file %s has no outstanding locks.\n", fsp->fsp_name ));
1315                 return;
1316         }
1317
1318         for (i = 0; i < count; i++) {
1319                 if (entries[i].fd != fsp->fd )
1320                         break;
1321
1322                 dump_entry(&entries[i]);
1323         }
1324
1325         if (i == count) {
1326                 /* All locks are ours. */
1327                 DEBUG(10,("posix_locking_close_file: file %s has %u outstanding locks, but all on one fd.\n", 
1328                         fsp->fsp_name, (unsigned int)count ));
1329                 free((char *)entries);
1330                 delete_posix_lock_entries(fsp);
1331                 return;
1332         }
1333
1334         /*
1335          * Difficult case. We need to delete all our locks, whilst leaving
1336          * all other POSIX locks in place.
1337          */
1338
1339         for (i = 0; i < count; i++) {
1340                 struct posix_lock *pl = &entries[i];
1341                 if (pl->fd == fsp->fd)
1342                         release_posix_lock(fsp, (SMB_BIG_UINT)pl->start, (SMB_BIG_UINT)pl->size );
1343         }
1344         free((char *)entries);
1345 }
1346
1347 /*******************************************************************
1348  Create the in-memory POSIX lock databases.
1349 ********************************************************************/
1350
1351 BOOL posix_locking_init(int read_only)
1352 {
1353         if (posix_lock_tdb && posix_pending_close_tdb)
1354                 return True;
1355         
1356         if (!posix_lock_tdb)
1357                 posix_lock_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1358                                           read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1359         if (!posix_lock_tdb) {
1360                 DEBUG(0,("Failed to open POSIX byte range locking database.\n"));
1361                 return False;
1362         }
1363         if (!posix_pending_close_tdb)
1364                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
1365                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
1366         if (!posix_pending_close_tdb) {
1367                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
1368                 return False;
1369         }
1370
1371         return True;
1372 }
1373
1374 /*******************************************************************
1375  Delete the in-memory POSIX lock databases.
1376 ********************************************************************/
1377
1378 BOOL posix_locking_end(void)
1379 {
1380     if (posix_lock_tdb && tdb_close(posix_lock_tdb) != 0)
1381                 return False;
1382     if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0)
1383                 return False;
1384         return True;
1385 }