r23183: Check in a change made by Tridge:
[kai/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 2 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program; if not, write to the Free Software
18    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    Revision History:
21
22    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
23 */
24
25 #include "includes.h"
26
27 #undef DBGC_CLASS
28 #define DBGC_CLASS DBGC_LOCKING
29
30 /*
31  * The pending close database handle.
32  */
33
34 static TDB_CONTEXT *posix_pending_close_tdb;
35
36 /****************************************************************************
37  First - the functions that deal with the underlying system locks - these
38  functions are used no matter if we're mapping CIFS Windows locks or CIFS
39  POSIX locks onto POSIX.
40 ****************************************************************************/
41
42 /****************************************************************************
43  Utility function to map a lock type correctly depending on the open
44  mode of a file.
45 ****************************************************************************/
46
47 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
48 {
49         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
50                 /*
51                  * Many UNIX's cannot get a write lock on a file opened read-only.
52                  * Win32 locking semantics allow this.
53                  * Do the best we can and attempt a read-only lock.
54                  */
55                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
56                 return F_RDLCK;
57         }
58
59         /*
60          * This return should be the most normal, as we attempt
61          * to always open files read/write.
62          */
63
64         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
65 }
66
67 /****************************************************************************
68  Debugging aid :-).
69 ****************************************************************************/
70
71 static const char *posix_lock_type_name(int lock_type)
72 {
73         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
74 }
75
76 /****************************************************************************
77  Check to see if the given unsigned lock range is within the possible POSIX
78  range. Modifies the given args to be in range if possible, just returns
79  False if not.
80 ****************************************************************************/
81
82 static BOOL posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
83                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
84 {
85         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
86         SMB_OFF_T count = (SMB_OFF_T)u_count;
87
88         /*
89          * For the type of system we are, attempt to
90          * find the maximum positive lock offset as an SMB_OFF_T.
91          */
92
93 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
94
95         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
96
97 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
98
99         /*
100          * In this case SMB_OFF_T is 64 bits,
101          * and the underlying system can handle 64 bit signed locks.
102          */
103
104         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
105         SMB_OFF_T mask = (mask2<<1);
106         SMB_OFF_T max_positive_lock_offset = ~mask;
107
108 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
109
110         /*
111          * In this case either SMB_OFF_T is 32 bits,
112          * or the underlying system cannot handle 64 bit signed locks.
113          * All offsets & counts must be 2^31 or less.
114          */
115
116         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
117
118 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
119
120         /*
121          * POSIX locks of length zero mean lock to end-of-file.
122          * Win32 locks of length zero are point probes. Ignore
123          * any Win32 locks of length zero. JRA.
124          */
125
126         if (count == (SMB_OFF_T)0) {
127                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
128                 return False;
129         }
130
131         /*
132          * If the given offset was > max_positive_lock_offset then we cannot map this at all
133          * ignore this lock.
134          */
135
136         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
137                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
138                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
139                 return False;
140         }
141
142         /*
143          * We must truncate the count to less than max_positive_lock_offset.
144          */
145
146         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
147                 count = max_positive_lock_offset;
148         }
149
150         /*
151          * Truncate count to end at max lock offset.
152          */
153
154         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
155                 count = max_positive_lock_offset - offset;
156         }
157
158         /*
159          * If we ate all the count, ignore this lock.
160          */
161
162         if (count == 0) {
163                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
164                                 (double)u_offset, (double)u_count ));
165                 return False;
166         }
167
168         /*
169          * The mapping was successful.
170          */
171
172         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
173                         (double)offset, (double)count ));
174
175         *offset_out = offset;
176         *count_out = count;
177         
178         return True;
179 }
180
181 /****************************************************************************
182  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
183  broken NFS implementations.
184 ****************************************************************************/
185
186 static BOOL posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
187 {
188         BOOL ret;
189
190         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
191
192         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
193
194         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
195
196                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
197                                         (double)offset,(double)count));
198                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
199                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
200
201                 /*
202                  * If the offset is > 0x7FFFFFFF then this will cause problems on
203                  * 32 bit NFS mounted filesystems. Just ignore it.
204                  */
205
206                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
207                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
208                         return True;
209                 }
210
211                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
212                         /* 32 bit NFS file system, retry with smaller offset */
213                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
214                         errno = 0;
215                         count &= 0x7fffffff;
216                         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
217                 }
218         }
219
220         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
221         return ret;
222 }
223
224 /****************************************************************************
225  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
226  broken NFS implementations.
227 ****************************************************************************/
228
229 static BOOL posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
230 {
231         pid_t pid;
232         BOOL ret;
233
234         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
235                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
236
237         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
238
239         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
240
241                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
242                                         (double)*poffset,(double)*pcount));
243                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
244                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
245
246                 /*
247                  * If the offset is > 0x7FFFFFFF then this will cause problems on
248                  * 32 bit NFS mounted filesystems. Just ignore it.
249                  */
250
251                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
252                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
253                         return True;
254                 }
255
256                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
257                         /* 32 bit NFS file system, retry with smaller offset */
258                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
259                         errno = 0;
260                         *pcount &= 0x7fffffff;
261                         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
262                 }
263         }
264
265         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
266         return ret;
267 }
268
269 /****************************************************************************
270  POSIX function to see if a file region is locked. Returns True if the
271  region is locked, False otherwise.
272 ****************************************************************************/
273
274 BOOL is_posix_locked(files_struct *fsp,
275                         SMB_BIG_UINT *pu_offset,
276                         SMB_BIG_UINT *pu_count,
277                         enum brl_type *plock_type,
278                         enum brl_flavour lock_flav)
279 {
280         SMB_OFF_T offset;
281         SMB_OFF_T count;
282         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
283
284         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
285                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
286
287         /*
288          * If the requested lock won't fit in the POSIX range, we will
289          * never set it, so presume it is not locked.
290          */
291
292         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
293                 return False;
294         }
295
296         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
297                 return False;
298         }
299
300         if (posix_lock_type == F_UNLCK) {
301                 return False;
302         }
303
304         if (lock_flav == POSIX_LOCK) {
305                 /* Only POSIX lock queries need to know the details. */
306                 *pu_offset = (SMB_BIG_UINT)offset;
307                 *pu_count = (SMB_BIG_UINT)count;
308                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
309         }
310         return True;
311 }
312
313 /****************************************************************************
314  Next - the functions that deal with in memory database storing representations
315  of either Windows CIFS locks or POSIX CIFS locks.
316 ****************************************************************************/
317
318 /* The key used in the in-memory POSIX databases. */
319
320 struct lock_ref_count_key {
321         struct file_id id;
322         char r;
323 }; 
324
325 /*******************************************************************
326  Form a static locking key for a dev/inode pair for the fd array.
327 ******************************************************************/
328
329 static TDB_DATA fd_array_key(struct file_id id)
330 {
331         static struct file_id key;
332         TDB_DATA kbuf;
333         key = id;
334         kbuf.dptr = (uint8 *)&key;
335         kbuf.dsize = sizeof(key);
336         return kbuf;
337 }
338
339 /*******************************************************************
340  Form a static locking key for a dev/inode pair for the lock ref count
341 ******************************************************************/
342
343 static TDB_DATA locking_ref_count_key(struct file_id id)
344 {
345         static struct lock_ref_count_key key;
346         TDB_DATA kbuf;
347
348         memset(&key, '\0', sizeof(key));
349         key.id = id;
350         key.r = 'r';
351         kbuf.dptr = (uint8 *)&key;
352         kbuf.dsize = sizeof(key);
353         return kbuf;
354 }
355
356 /*******************************************************************
357  Convenience function to get an fd_array key from an fsp.
358 ******************************************************************/
359
360 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
361 {
362         return fd_array_key(fsp->file_id);
363 }
364
365 /*******************************************************************
366  Convenience function to get a lock ref count key from an fsp.
367 ******************************************************************/
368
369 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp)
370 {
371         return locking_ref_count_key(fsp->file_id);
372 }
373
374 /*******************************************************************
375  Create the in-memory POSIX lock databases.
376 ********************************************************************/
377
378 BOOL posix_locking_init(int read_only)
379 {
380         if (posix_pending_close_tdb) {
381                 return True;
382         }
383         
384         if (!posix_pending_close_tdb) {
385                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
386                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
387         }
388         if (!posix_pending_close_tdb) {
389                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
390                 return False;
391         }
392
393         return True;
394 }
395
396 /*******************************************************************
397  Delete the in-memory POSIX lock databases.
398 ********************************************************************/
399
400 BOOL posix_locking_end(void)
401 {
402         if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0) {
403                 return False;
404         }
405         return True;
406 }
407
408 /****************************************************************************
409  Next - the functions that deal with storing fd's that have outstanding
410  POSIX locks when closed.
411 ****************************************************************************/
412
413 /****************************************************************************
414  The records in posix_pending_close_tdb are composed of an array of ints
415  keyed by dev/ino pair.
416  The first int is a reference count of the number of outstanding locks on
417  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
418  were open on this dev/ino pair that should have been closed, but can't as
419  the lock ref count is non zero.
420 ****************************************************************************/
421
422 /****************************************************************************
423  Keep a reference count of the number of Windows locks open on this dev/ino
424  pair. Creates entry if it doesn't exist.
425 ****************************************************************************/
426
427 static void increment_windows_lock_ref_count(files_struct *fsp)
428 {
429         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
430         TDB_DATA dbuf;
431         int lock_ref_count;
432
433         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
434         if (dbuf.dptr == NULL) {
435                 dbuf.dptr = (uint8 *)SMB_MALLOC_P(int);
436                 if (!dbuf.dptr) {
437                         smb_panic("increment_windows_lock_ref_count: malloc fail.\n");
438                 }
439                 memset(dbuf.dptr, '\0', sizeof(int));
440                 dbuf.dsize = sizeof(int);
441         }
442
443         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
444         lock_ref_count++;
445         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
446         
447         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
448                 smb_panic("increment_windows_lock_ref_count: tdb_store_fail.\n");
449         }
450         SAFE_FREE(dbuf.dptr);
451
452         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
453                 fsp->fsp_name, lock_ref_count ));
454 }
455
456 static void decrement_windows_lock_ref_count(files_struct *fsp)
457 {
458         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
459         TDB_DATA dbuf;
460         int lock_ref_count;
461
462         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
463         if (!dbuf.dptr) {
464                 smb_panic("decrement_windows_lock_ref_count: logic error.\n");
465         }
466
467         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
468         lock_ref_count--;
469         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
470
471         if (lock_ref_count < 0) {
472                 smb_panic("decrement_windows_lock_ref_count: lock_count logic error.\n");
473         }
474
475         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
476                 smb_panic("decrement_windows_lock_ref_count: tdb_store_fail.\n");
477         }
478         SAFE_FREE(dbuf.dptr);
479
480         DEBUG(10,("decrement_windows_lock_ref_count for file now %s = %d\n",
481                 fsp->fsp_name, lock_ref_count ));
482 }
483
484 /****************************************************************************
485  Bulk delete - subtract as many locks as we've just deleted.
486 ****************************************************************************/
487
488 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
489 {
490         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
491         TDB_DATA dbuf;
492         int lock_ref_count;
493
494         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
495         if (!dbuf.dptr) {
496                 return;
497         }
498
499         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
500         lock_ref_count -= dcount;
501
502         if (lock_ref_count < 0) {
503                 smb_panic("reduce_windows_lock_ref_count: lock_count logic error.\n");
504         }
505         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
506         
507         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
508                 smb_panic("reduce_windows_lock_ref_count: tdb_store_fail.\n");
509         }
510         SAFE_FREE(dbuf.dptr);
511
512         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
513                 fsp->fsp_name, lock_ref_count ));
514 }
515
516 /****************************************************************************
517  Fetch the lock ref count.
518 ****************************************************************************/
519
520 static int get_windows_lock_ref_count(files_struct *fsp)
521 {
522         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
523         TDB_DATA dbuf;
524         int lock_ref_count;
525
526         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
527         if (!dbuf.dptr) {
528                 lock_ref_count = 0;
529         } else {
530                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
531         }
532         SAFE_FREE(dbuf.dptr);
533
534         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
535                 fsp->fsp_name, lock_ref_count ));
536         return lock_ref_count;
537 }
538
539 /****************************************************************************
540  Delete a lock_ref_count entry.
541 ****************************************************************************/
542
543 static void delete_windows_lock_ref_count(files_struct *fsp)
544 {
545         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
546
547         /* Not a bug if it doesn't exist - no locks were ever granted. */
548         tdb_delete(posix_pending_close_tdb, kbuf);
549         DEBUG(10,("delete_windows_lock_ref_count for file %s\n", fsp->fsp_name));
550 }
551
552 /****************************************************************************
553  Add an fd to the pending close tdb.
554 ****************************************************************************/
555
556 static void add_fd_to_close_entry(files_struct *fsp)
557 {
558         TDB_DATA kbuf = fd_array_key_fsp(fsp);
559         TDB_DATA dbuf;
560
561         dbuf.dptr = NULL;
562         dbuf.dsize = 0;
563
564         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
565
566         dbuf.dptr = (uint8 *)SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
567         if (!dbuf.dptr) {
568                 smb_panic("add_fd_to_close_entry: Realloc fail !\n");
569         }
570
571         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fh->fd, sizeof(int));
572         dbuf.dsize += sizeof(int);
573
574         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
575                 smb_panic("add_fd_to_close_entry: tdb_store_fail.\n");
576         }
577
578         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
579                 fsp->fh->fd, fsp->fsp_name ));
580
581         SAFE_FREE(dbuf.dptr);
582 }
583
584 /****************************************************************************
585  Remove all fd entries for a specific dev/inode pair from the tdb.
586 ****************************************************************************/
587
588 static void delete_close_entries(files_struct *fsp)
589 {
590         TDB_DATA kbuf = fd_array_key_fsp(fsp);
591
592         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1) {
593                 smb_panic("delete_close_entries: tdb_delete fail !\n");
594         }
595 }
596
597 /****************************************************************************
598  Get the array of POSIX pending close records for an open fsp. Caller must
599  free. Returns number of entries.
600 ****************************************************************************/
601
602 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
603 {
604         TDB_DATA kbuf = fd_array_key_fsp(fsp);
605         TDB_DATA dbuf;
606         size_t count = 0;
607
608         *entries = NULL;
609         dbuf.dptr = NULL;
610
611         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
612
613         if (!dbuf.dptr) {
614                 return 0;
615         }
616
617         *entries = (int *)dbuf.dptr;
618         count = (size_t)(dbuf.dsize / sizeof(int));
619
620         return count;
621 }
622
623 /****************************************************************************
624  Deal with pending closes needed by POSIX locking support.
625  Note that posix_locking_close_file() is expected to have been called
626  to delete all locks on this fsp before this function is called.
627 ****************************************************************************/
628
629 NTSTATUS fd_close_posix(struct connection_struct *conn, files_struct *fsp)
630 {
631         int saved_errno = 0;
632         int ret;
633         int *fd_array = NULL;
634         size_t count, i;
635
636         if (!lp_locking(fsp->conn->params) || !lp_posix_locking(conn->params)) {
637                 /*
638                  * No locking or POSIX to worry about or we want POSIX semantics
639                  * which will lose all locks on all fd's open on this dev/inode,
640                  * just close.
641                  */
642                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
643                 fsp->fh->fd = -1;
644                 return map_nt_error_from_unix(errno);
645         }
646
647         if (get_windows_lock_ref_count(fsp)) {
648
649                 /*
650                  * There are outstanding locks on this dev/inode pair on other fds.
651                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
652                  */
653
654                 add_fd_to_close_entry(fsp);
655                 fsp->fh->fd = -1;
656                 return NT_STATUS_OK;
657         }
658
659         /*
660          * No outstanding locks. Get the pending close fd's
661          * from the tdb and close them all.
662          */
663
664         count = get_posix_pending_close_entries(fsp, &fd_array);
665
666         if (count) {
667                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
668
669                 for(i = 0; i < count; i++) {
670                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
671                                 saved_errno = errno;
672                         }
673                 }
674
675                 /*
676                  * Delete all fd's stored in the tdb
677                  * for this dev/inode pair.
678                  */
679
680                 delete_close_entries(fsp);
681         }
682
683         SAFE_FREE(fd_array);
684
685         /* Don't need a lock ref count on this dev/ino anymore. */
686         delete_windows_lock_ref_count(fsp);
687
688         /*
689          * Finally close the fd associated with this fsp.
690          */
691
692         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
693
694         if (ret == 0 && saved_errno != 0) {
695                 errno = saved_errno;
696                 ret = -1;
697         } 
698
699         fsp->fh->fd = -1;
700
701         if (ret == -1) {
702                 return map_nt_error_from_unix(errno);
703         }
704
705         return NT_STATUS_OK;
706 }
707
708 /****************************************************************************
709  Next - the functions that deal with the mapping CIFS Windows locks onto
710  the underlying system POSIX locks.
711 ****************************************************************************/
712
713 /*
714  * Structure used when splitting a lock range
715  * into a POSIX lock range. Doubly linked list.
716  */
717
718 struct lock_list {
719         struct lock_list *next;
720         struct lock_list *prev;
721         SMB_OFF_T start;
722         SMB_OFF_T size;
723 };
724
725 /****************************************************************************
726  Create a list of lock ranges that don't overlap a given range. Used in calculating
727  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
728  understand it :-).
729 ****************************************************************************/
730
731 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
732                                                 struct lock_list *lhead,
733                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
734                                                 files_struct *fsp,
735                                                 const struct lock_struct *plocks,
736                                                 int num_locks)
737 {
738         int i;
739
740         /*
741          * Check the current lock list on this dev/inode pair.
742          * Quit if the list is deleted.
743          */
744
745         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
746                 (double)lhead->start, (double)lhead->size ));
747
748         for (i=0; i<num_locks && lhead; i++) {
749                 const struct lock_struct *lock = &plocks[i];
750                 struct lock_list *l_curr;
751
752                 /* Ignore all but read/write locks. */
753                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
754                         continue;
755                 }
756
757                 /* Ignore locks not owned by this process. */
758                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
759                         continue;
760                 }
761
762                 /*
763                  * Walk the lock list, checking for overlaps. Note that
764                  * the lock list can expand within this loop if the current
765                  * range being examined needs to be split.
766                  */
767
768                 for (l_curr = lhead; l_curr;) {
769
770                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
771                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
772
773                         if ( (l_curr->start >= (lock->start + lock->size)) ||
774                                  (lock->start >= (l_curr->start + l_curr->size))) {
775
776                                 /* No overlap with existing lock - leave this range alone. */
777 /*********************************************
778                                              +---------+
779                                              | l_curr  |
780                                              +---------+
781                                 +-------+
782                                 | lock  |
783                                 +-------+
784 OR....
785              +---------+
786              |  l_curr |
787              +---------+
788 **********************************************/
789
790                                 DEBUG(10,(" no overlap case.\n" ));
791
792                                 l_curr = l_curr->next;
793
794                         } else if ( (l_curr->start >= lock->start) &&
795                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
796
797                                 /*
798                                  * This range is completely overlapped by this existing lock range
799                                  * and thus should have no effect. Delete it from the list.
800                                  */
801 /*********************************************
802                 +---------+
803                 |  l_curr |
804                 +---------+
805         +---------------------------+
806         |       lock                |
807         +---------------------------+
808 **********************************************/
809                                 /* Save the next pointer */
810                                 struct lock_list *ul_next = l_curr->next;
811
812                                 DEBUG(10,(" delete case.\n" ));
813
814                                 DLIST_REMOVE(lhead, l_curr);
815                                 if(lhead == NULL) {
816                                         break; /* No more list... */
817                                 }
818
819                                 l_curr = ul_next;
820                                 
821                         } else if ( (l_curr->start >= lock->start) &&
822                                                 (l_curr->start < lock->start + lock->size) &&
823                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
824
825                                 /*
826                                  * This range overlaps the existing lock range at the high end.
827                                  * Truncate by moving start to existing range end and reducing size.
828                                  */
829 /*********************************************
830                 +---------------+
831                 |  l_curr       |
832                 +---------------+
833         +---------------+
834         |    lock       |
835         +---------------+
836 BECOMES....
837                         +-------+
838                         | l_curr|
839                         +-------+
840 **********************************************/
841
842                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
843                                 l_curr->start = lock->start + lock->size;
844
845                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
846                                                                 (double)l_curr->start, (double)l_curr->size ));
847
848                                 l_curr = l_curr->next;
849
850                         } else if ( (l_curr->start < lock->start) &&
851                                                 (l_curr->start + l_curr->size > lock->start) &&
852                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
853
854                                 /*
855                                  * This range overlaps the existing lock range at the low end.
856                                  * Truncate by reducing size.
857                                  */
858 /*********************************************
859    +---------------+
860    |  l_curr       |
861    +---------------+
862            +---------------+
863            |    lock       |
864            +---------------+
865 BECOMES....
866    +-------+
867    | l_curr|
868    +-------+
869 **********************************************/
870
871                                 l_curr->size = lock->start - l_curr->start;
872
873                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
874                                                                 (double)l_curr->start, (double)l_curr->size ));
875
876                                 l_curr = l_curr->next;
877                 
878                         } else if ( (l_curr->start < lock->start) &&
879                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
880                                 /*
881                                  * Worst case scenario. Range completely overlaps an existing
882                                  * lock range. Split the request into two, push the new (upper) request
883                                  * into the dlink list, and continue with the entry after l_new (as we
884                                  * know that l_new will not overlap with this lock).
885                                  */
886 /*********************************************
887         +---------------------------+
888         |        l_curr             |
889         +---------------------------+
890                 +---------+
891                 | lock    |
892                 +---------+
893 BECOMES.....
894         +-------+         +---------+
895         | l_curr|         | l_new   |
896         +-------+         +---------+
897 **********************************************/
898                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
899
900                                 if(l_new == NULL) {
901                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
902                                         return NULL; /* The talloc_destroy takes care of cleanup. */
903                                 }
904
905                                 ZERO_STRUCTP(l_new);
906                                 l_new->start = lock->start + lock->size;
907                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
908
909                                 /* Truncate the l_curr. */
910                                 l_curr->size = lock->start - l_curr->start;
911
912                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
913 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
914                                                                 (double)l_new->start, (double)l_new->size ));
915
916                                 /*
917                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
918                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
919                                  */
920
921                                 l_new->prev = l_curr;
922                                 l_new->next = l_curr->next;
923                                 l_curr->next = l_new;
924
925                                 /* And move after the link we added. */
926                                 l_curr = l_new->next;
927
928                         } else {
929
930                                 /*
931                                  * This logic case should never happen. Ensure this is the
932                                  * case by forcing an abort.... Remove in production.
933                                  */
934                                 pstring msg;
935
936                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
937 lock: start = %.0f, size = %.0f\n", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
938
939                                 smb_panic(msg);
940                         }
941                 } /* end for ( l_curr = lhead; l_curr;) */
942         } /* end for (i=0; i<num_locks && ul_head; i++) */
943
944         return lhead;
945 }
946
947 /****************************************************************************
948  POSIX function to acquire a lock. Returns True if the
949  lock could be granted, False if not.
950 ****************************************************************************/
951
952 BOOL set_posix_lock_windows_flavour(files_struct *fsp,
953                         SMB_BIG_UINT u_offset,
954                         SMB_BIG_UINT u_count,
955                         enum brl_type lock_type,
956                         const struct lock_context *lock_ctx,
957                         const struct lock_struct *plocks,
958                         int num_locks,
959                         int *errno_ret)
960 {
961         SMB_OFF_T offset;
962         SMB_OFF_T count;
963         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
964         BOOL ret = True;
965         size_t lock_count;
966         TALLOC_CTX *l_ctx = NULL;
967         struct lock_list *llist = NULL;
968         struct lock_list *ll = NULL;
969
970         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
971                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
972
973         /*
974          * If the requested lock won't fit in the POSIX range, we will
975          * pretend it was successful.
976          */
977
978         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
979                 increment_windows_lock_ref_count(fsp);
980                 return True;
981         }
982
983         /*
984          * Windows is very strange. It allows read locks to be overlayed
985          * (even over a write lock), but leaves the write lock in force until the first
986          * unlock. It also reference counts the locks. This means the following sequence :
987          *
988          * process1                                      process2
989          * ------------------------------------------------------------------------
990          * WRITE LOCK : start = 2, len = 10
991          *                                            READ LOCK: start =0, len = 10 - FAIL
992          * READ LOCK : start = 0, len = 14 
993          *                                            READ LOCK: start =0, len = 10 - FAIL
994          * UNLOCK : start = 2, len = 10
995          *                                            READ LOCK: start =0, len = 10 - OK
996          *
997          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
998          * would leave a single read lock over the 0-14 region.
999          */
1000         
1001         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1002                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1003                 return False;
1004         }
1005
1006         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1007                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1008                 talloc_destroy(l_ctx);
1009                 return False;
1010         }
1011
1012         /*
1013          * Create the initial list entry containing the
1014          * lock we want to add.
1015          */
1016
1017         ZERO_STRUCTP(ll);
1018         ll->start = offset;
1019         ll->size = count;
1020
1021         DLIST_ADD(llist, ll);
1022
1023         /*
1024          * The following call calculates if there are any
1025          * overlapping locks held by this process on
1026          * fd's open on the same file and splits this list
1027          * into a list of lock ranges that do not overlap with existing
1028          * POSIX locks.
1029          */
1030
1031         llist = posix_lock_list(l_ctx,
1032                                 llist,
1033                                 lock_ctx, /* Lock context llist belongs to. */
1034                                 fsp,
1035                                 plocks,
1036                                 num_locks);
1037
1038         /*
1039          * Add the POSIX locks on the list of ranges returned.
1040          * As the lock is supposed to be added atomically, we need to
1041          * back out all the locks if any one of these calls fail.
1042          */
1043
1044         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1045                 offset = ll->start;
1046                 count = ll->size;
1047
1048                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1049                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1050
1051                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1052                         *errno_ret = errno;
1053                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1054                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1055                         ret = False;
1056                         break;
1057                 }
1058         }
1059
1060         if (!ret) {
1061
1062                 /*
1063                  * Back out all the POSIX locks we have on fail.
1064                  */
1065
1066                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1067                         offset = ll->start;
1068                         count = ll->size;
1069
1070                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1071                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1072
1073                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1074                 }
1075         } else {
1076                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1077                 increment_windows_lock_ref_count(fsp);
1078         }
1079
1080         talloc_destroy(l_ctx);
1081         return ret;
1082 }
1083
1084 /****************************************************************************
1085  POSIX function to release a lock. Returns True if the
1086  lock could be released, False if not.
1087 ****************************************************************************/
1088
1089 BOOL release_posix_lock_windows_flavour(files_struct *fsp,
1090                                 SMB_BIG_UINT u_offset,
1091                                 SMB_BIG_UINT u_count,
1092                                 enum brl_type deleted_lock_type,
1093                                 const struct lock_context *lock_ctx,
1094                                 const struct lock_struct *plocks,
1095                                 int num_locks)
1096 {
1097         SMB_OFF_T offset;
1098         SMB_OFF_T count;
1099         BOOL ret = True;
1100         TALLOC_CTX *ul_ctx = NULL;
1101         struct lock_list *ulist = NULL;
1102         struct lock_list *ul = NULL;
1103
1104         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f\n",
1105                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1106
1107         /* Remember the number of Windows locks we have on this dev/ino pair. */
1108         decrement_windows_lock_ref_count(fsp);
1109
1110         /*
1111          * If the requested lock won't fit in the POSIX range, we will
1112          * pretend it was successful.
1113          */
1114
1115         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1116                 return True;
1117         }
1118
1119         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1120                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1121                 return False;
1122         }
1123
1124         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1125                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1126                 talloc_destroy(ul_ctx);
1127                 return False;
1128         }
1129
1130         /*
1131          * Create the initial list entry containing the
1132          * lock we want to remove.
1133          */
1134
1135         ZERO_STRUCTP(ul);
1136         ul->start = offset;
1137         ul->size = count;
1138
1139         DLIST_ADD(ulist, ul);
1140
1141         /*
1142          * The following call calculates if there are any
1143          * overlapping locks held by this process on
1144          * fd's open on the same file and creates a
1145          * list of unlock ranges that will allow
1146          * POSIX lock ranges to remain on the file whilst the
1147          * unlocks are performed.
1148          */
1149
1150         ulist = posix_lock_list(ul_ctx,
1151                                 ulist,
1152                                 lock_ctx, /* Lock context ulist belongs to. */
1153                                 fsp,
1154                                 plocks,
1155                                 num_locks);
1156
1157         /*
1158          * If there were any overlapped entries (list is > 1 or size or start have changed),
1159          * and the lock_type we just deleted from
1160          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1161          * the POSIX lock to a read lock. This allows any overlapping read locks
1162          * to be atomically maintained.
1163          */
1164
1165         if (deleted_lock_type == WRITE_LOCK &&
1166                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1167
1168                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1169                         (double)offset, (double)count ));
1170
1171                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1172                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1173                         talloc_destroy(ul_ctx);
1174                         return False;
1175                 }
1176         }
1177
1178         /*
1179          * Release the POSIX locks on the list of ranges returned.
1180          */
1181
1182         for(; ulist; ulist = ulist->next) {
1183                 offset = ulist->start;
1184                 count = ulist->size;
1185
1186                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1187                         (double)offset, (double)count ));
1188
1189                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1190                         ret = False;
1191                 }
1192         }
1193
1194         talloc_destroy(ul_ctx);
1195         return ret;
1196 }
1197
1198 /****************************************************************************
1199  Next - the functions that deal with mapping CIFS POSIX locks onto
1200  the underlying system POSIX locks.
1201 ****************************************************************************/
1202
1203 /****************************************************************************
1204  POSIX function to acquire a lock. Returns True if the
1205  lock could be granted, False if not.
1206  As POSIX locks don't stack or conflict (they just overwrite)
1207  we can map the requested lock directly onto a system one. We
1208  know it doesn't conflict with locks on other contexts as the
1209  upper layer would have refused it.
1210 ****************************************************************************/
1211
1212 BOOL set_posix_lock_posix_flavour(files_struct *fsp,
1213                         SMB_BIG_UINT u_offset,
1214                         SMB_BIG_UINT u_count,
1215                         enum brl_type lock_type,
1216                         int *errno_ret)
1217 {
1218         SMB_OFF_T offset;
1219         SMB_OFF_T count;
1220         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1221
1222         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
1223                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1224
1225         /*
1226          * If the requested lock won't fit in the POSIX range, we will
1227          * pretend it was successful.
1228          */
1229
1230         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1231                 return True;
1232         }
1233
1234         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1235                 *errno_ret = errno;
1236                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1237                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1238                 return False;
1239         }
1240         return True;
1241 }
1242
1243 /****************************************************************************
1244  POSIX function to release a lock. Returns True if the
1245  lock could be released, False if not.
1246  We are given a complete lock state from the upper layer which is what the lock
1247  state should be after the unlock has already been done, so what
1248  we do is punch out holes in the unlock range where locks owned by this process
1249  have a different lock context.
1250 ****************************************************************************/
1251
1252 BOOL release_posix_lock_posix_flavour(files_struct *fsp,
1253                                 SMB_BIG_UINT u_offset,
1254                                 SMB_BIG_UINT u_count,
1255                                 const struct lock_context *lock_ctx,
1256                                 const struct lock_struct *plocks,
1257                                 int num_locks)
1258 {
1259         BOOL ret = True;
1260         SMB_OFF_T offset;
1261         SMB_OFF_T count;
1262         TALLOC_CTX *ul_ctx = NULL;
1263         struct lock_list *ulist = NULL;
1264         struct lock_list *ul = NULL;
1265
1266         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f\n",
1267                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1268
1269         /*
1270          * If the requested lock won't fit in the POSIX range, we will
1271          * pretend it was successful.
1272          */
1273
1274         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1275                 return True;
1276         }
1277
1278         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1279                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1280                 return False;
1281         }
1282
1283         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1284                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1285                 talloc_destroy(ul_ctx);
1286                 return False;
1287         }
1288
1289         /*
1290          * Create the initial list entry containing the
1291          * lock we want to remove.
1292          */
1293
1294         ZERO_STRUCTP(ul);
1295         ul->start = offset;
1296         ul->size = count;
1297
1298         DLIST_ADD(ulist, ul);
1299
1300         /*
1301          * Walk the given array creating a linked list
1302          * of unlock requests.
1303          */
1304
1305         ulist = posix_lock_list(ul_ctx,
1306                                 ulist,
1307                                 lock_ctx, /* Lock context ulist belongs to. */
1308                                 fsp,
1309                                 plocks,
1310                                 num_locks);
1311
1312         /*
1313          * Release the POSIX locks on the list of ranges returned.
1314          */
1315
1316         for(; ulist; ulist = ulist->next) {
1317                 offset = ulist->start;
1318                 count = ulist->size;
1319
1320                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1321                         (double)offset, (double)count ));
1322
1323                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1324                         ret = False;
1325                 }
1326         }
1327
1328         talloc_destroy(ul_ctx);
1329         return ret;
1330 }