135e204f014e082ea460a748965631bb28201d67
[samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25
26 #undef DBGC_CLASS
27 #define DBGC_CLASS DBGC_LOCKING
28
29 /*
30  * The pending close database handle.
31  */
32
33 static TDB_CONTEXT *posix_pending_close_tdb;
34
35 /****************************************************************************
36  First - the functions that deal with the underlying system locks - these
37  functions are used no matter if we're mapping CIFS Windows locks or CIFS
38  POSIX locks onto POSIX.
39 ****************************************************************************/
40
41 /****************************************************************************
42  Utility function to map a lock type correctly depending on the open
43  mode of a file.
44 ****************************************************************************/
45
46 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
47 {
48         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
49                 /*
50                  * Many UNIX's cannot get a write lock on a file opened read-only.
51                  * Win32 locking semantics allow this.
52                  * Do the best we can and attempt a read-only lock.
53                  */
54                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
55                 return F_RDLCK;
56         }
57
58         /*
59          * This return should be the most normal, as we attempt
60          * to always open files read/write.
61          */
62
63         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
64 }
65
66 /****************************************************************************
67  Debugging aid :-).
68 ****************************************************************************/
69
70 static const char *posix_lock_type_name(int lock_type)
71 {
72         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
73 }
74
75 /****************************************************************************
76  Check to see if the given unsigned lock range is within the possible POSIX
77  range. Modifies the given args to be in range if possible, just returns
78  False if not.
79 ****************************************************************************/
80
81 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
82                                 SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
83 {
84         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
85         SMB_OFF_T count = (SMB_OFF_T)u_count;
86
87         /*
88          * For the type of system we are, attempt to
89          * find the maximum positive lock offset as an SMB_OFF_T.
90          */
91
92 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
93
94         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
95
96 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
97
98         /*
99          * In this case SMB_OFF_T is 64 bits,
100          * and the underlying system can handle 64 bit signed locks.
101          */
102
103         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
104         SMB_OFF_T mask = (mask2<<1);
105         SMB_OFF_T max_positive_lock_offset = ~mask;
106
107 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
108
109         /*
110          * In this case either SMB_OFF_T is 32 bits,
111          * or the underlying system cannot handle 64 bit signed locks.
112          * All offsets & counts must be 2^31 or less.
113          */
114
115         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
116
117 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
118
119         /*
120          * POSIX locks of length zero mean lock to end-of-file.
121          * Win32 locks of length zero are point probes. Ignore
122          * any Win32 locks of length zero. JRA.
123          */
124
125         if (count == (SMB_OFF_T)0) {
126                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
127                 return False;
128         }
129
130         /*
131          * If the given offset was > max_positive_lock_offset then we cannot map this at all
132          * ignore this lock.
133          */
134
135         if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
136                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
137                                 (double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
138                 return False;
139         }
140
141         /*
142          * We must truncate the count to less than max_positive_lock_offset.
143          */
144
145         if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
146                 count = max_positive_lock_offset;
147         }
148
149         /*
150          * Truncate count to end at max lock offset.
151          */
152
153         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
154                 count = max_positive_lock_offset - offset;
155         }
156
157         /*
158          * If we ate all the count, ignore this lock.
159          */
160
161         if (count == 0) {
162                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
163                                 (double)u_offset, (double)u_count ));
164                 return False;
165         }
166
167         /*
168          * The mapping was successful.
169          */
170
171         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
172                         (double)offset, (double)count ));
173
174         *offset_out = offset;
175         *count_out = count;
176         
177         return True;
178 }
179
180 /****************************************************************************
181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182  broken NFS implementations.
183 ****************************************************************************/
184
185 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
186 {
187         bool ret;
188
189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
190
191         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
192
193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
194
195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196                                         (double)offset,(double)count));
197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
199
200                 /*
201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
202                  * 32 bit NFS mounted filesystems. Just ignore it.
203                  */
204
205                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207                         return True;
208                 }
209
210                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
211                         /* 32 bit NFS file system, retry with smaller offset */
212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213                         errno = 0;
214                         count &= 0x7fffffff;
215                         ret = SMB_VFS_LOCK(fsp,fsp->fh->fd,op,offset,count,type);
216                 }
217         }
218
219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220         return ret;
221 }
222
223 /****************************************************************************
224  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
225  broken NFS implementations.
226 ****************************************************************************/
227
228 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
229 {
230         pid_t pid;
231         bool ret;
232
233         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
234                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
235
236         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
237
238         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
239
240                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
241                                         (double)*poffset,(double)*pcount));
242                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
243                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
244
245                 /*
246                  * If the offset is > 0x7FFFFFFF then this will cause problems on
247                  * 32 bit NFS mounted filesystems. Just ignore it.
248                  */
249
250                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
251                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
252                         return True;
253                 }
254
255                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
256                         /* 32 bit NFS file system, retry with smaller offset */
257                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
258                         errno = 0;
259                         *pcount &= 0x7fffffff;
260                         ret = SMB_VFS_GETLOCK(fsp,fsp->fh->fd,poffset,pcount,ptype,&pid);
261                 }
262         }
263
264         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
265         return ret;
266 }
267
268 /****************************************************************************
269  POSIX function to see if a file region is locked. Returns True if the
270  region is locked, False otherwise.
271 ****************************************************************************/
272
273 bool is_posix_locked(files_struct *fsp,
274                         SMB_BIG_UINT *pu_offset,
275                         SMB_BIG_UINT *pu_count,
276                         enum brl_type *plock_type,
277                         enum brl_flavour lock_flav)
278 {
279         SMB_OFF_T offset;
280         SMB_OFF_T count;
281         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
282
283         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
284                 fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
285
286         /*
287          * If the requested lock won't fit in the POSIX range, we will
288          * never set it, so presume it is not locked.
289          */
290
291         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
292                 return False;
293         }
294
295         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
296                 return False;
297         }
298
299         if (posix_lock_type == F_UNLCK) {
300                 return False;
301         }
302
303         if (lock_flav == POSIX_LOCK) {
304                 /* Only POSIX lock queries need to know the details. */
305                 *pu_offset = (SMB_BIG_UINT)offset;
306                 *pu_count = (SMB_BIG_UINT)count;
307                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
308         }
309         return True;
310 }
311
312 /****************************************************************************
313  Next - the functions that deal with in memory database storing representations
314  of either Windows CIFS locks or POSIX CIFS locks.
315 ****************************************************************************/
316
317 /* The key used in the in-memory POSIX databases. */
318
319 struct lock_ref_count_key {
320         struct file_id id;
321         char r;
322 }; 
323
324 /*******************************************************************
325  Form a static locking key for a dev/inode pair for the fd array.
326 ******************************************************************/
327
328 static TDB_DATA fd_array_key(struct file_id id)
329 {
330         static struct file_id key;
331         TDB_DATA kbuf;
332         key = id;
333         kbuf.dptr = (uint8 *)&key;
334         kbuf.dsize = sizeof(key);
335         return kbuf;
336 }
337
338 /*******************************************************************
339  Form a static locking key for a dev/inode pair for the lock ref count
340 ******************************************************************/
341
342 static TDB_DATA locking_ref_count_key(struct file_id id)
343 {
344         static struct lock_ref_count_key key;
345         TDB_DATA kbuf;
346
347         memset(&key, '\0', sizeof(key));
348         key.id = id;
349         key.r = 'r';
350         kbuf.dptr = (uint8 *)&key;
351         kbuf.dsize = sizeof(key);
352         return kbuf;
353 }
354
355 /*******************************************************************
356  Convenience function to get an fd_array key from an fsp.
357 ******************************************************************/
358
359 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
360 {
361         return fd_array_key(fsp->file_id);
362 }
363
364 /*******************************************************************
365  Convenience function to get a lock ref count key from an fsp.
366 ******************************************************************/
367
368 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp)
369 {
370         return locking_ref_count_key(fsp->file_id);
371 }
372
373 /*******************************************************************
374  Create the in-memory POSIX lock databases.
375 ********************************************************************/
376
377 bool posix_locking_init(int read_only)
378 {
379         if (posix_pending_close_tdb) {
380                 return True;
381         }
382         
383         if (!posix_pending_close_tdb) {
384                 posix_pending_close_tdb = tdb_open_log(NULL, 0, TDB_INTERNAL,
385                                                    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644);
386         }
387         if (!posix_pending_close_tdb) {
388                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
389                 return False;
390         }
391
392         return True;
393 }
394
395 /*******************************************************************
396  Delete the in-memory POSIX lock databases.
397 ********************************************************************/
398
399 bool posix_locking_end(void)
400 {
401         if (posix_pending_close_tdb && tdb_close(posix_pending_close_tdb) != 0) {
402                 return False;
403         }
404         return True;
405 }
406
407 /****************************************************************************
408  Next - the functions that deal with storing fd's that have outstanding
409  POSIX locks when closed.
410 ****************************************************************************/
411
412 /****************************************************************************
413  The records in posix_pending_close_tdb are composed of an array of ints
414  keyed by dev/ino pair.
415  The first int is a reference count of the number of outstanding locks on
416  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
417  were open on this dev/ino pair that should have been closed, but can't as
418  the lock ref count is non zero.
419 ****************************************************************************/
420
421 /****************************************************************************
422  Keep a reference count of the number of Windows locks open on this dev/ino
423  pair. Creates entry if it doesn't exist.
424 ****************************************************************************/
425
426 static void increment_windows_lock_ref_count(files_struct *fsp)
427 {
428         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
429         TDB_DATA dbuf;
430         int lock_ref_count;
431
432         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
433         if (dbuf.dptr == NULL) {
434                 dbuf.dptr = (uint8 *)SMB_MALLOC_P(int);
435                 if (!dbuf.dptr) {
436                         smb_panic("increment_windows_lock_ref_count: malloc fail");
437                 }
438                 memset(dbuf.dptr, '\0', sizeof(int));
439                 dbuf.dsize = sizeof(int);
440         }
441
442         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
443         lock_ref_count++;
444         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
445         
446         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
447                 smb_panic("increment_windows_lock_ref_count: tdb_store_fail");
448         }
449         SAFE_FREE(dbuf.dptr);
450
451         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
452                 fsp->fsp_name, lock_ref_count ));
453 }
454
455 static void decrement_windows_lock_ref_count(files_struct *fsp)
456 {
457         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
458         TDB_DATA dbuf;
459         int lock_ref_count;
460
461         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
462         if (!dbuf.dptr) {
463                 smb_panic("decrement_windows_lock_ref_count: logic error");
464         }
465
466         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
467         lock_ref_count--;
468         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
469
470         if (lock_ref_count < 0) {
471                 smb_panic("decrement_windows_lock_ref_count: lock_count logic error");
472         }
473
474         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
475                 smb_panic("decrement_windows_lock_ref_count: tdb_store_fail");
476         }
477         SAFE_FREE(dbuf.dptr);
478
479         DEBUG(10,("decrement_windows_lock_ref_count for file now %s = %d\n",
480                 fsp->fsp_name, lock_ref_count ));
481 }
482
483 /****************************************************************************
484  Bulk delete - subtract as many locks as we've just deleted.
485 ****************************************************************************/
486
487 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
488 {
489         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
490         TDB_DATA dbuf;
491         int lock_ref_count;
492
493         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
494         if (!dbuf.dptr) {
495                 return;
496         }
497
498         memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
499         lock_ref_count -= dcount;
500
501         if (lock_ref_count < 0) {
502                 smb_panic("reduce_windows_lock_ref_count: lock_count logic error");
503         }
504         memcpy(dbuf.dptr, &lock_ref_count, sizeof(int));
505         
506         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
507                 smb_panic("reduce_windows_lock_ref_count: tdb_store_fail");
508         }
509         SAFE_FREE(dbuf.dptr);
510
511         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
512                 fsp->fsp_name, lock_ref_count ));
513 }
514
515 /****************************************************************************
516  Fetch the lock ref count.
517 ****************************************************************************/
518
519 static int get_windows_lock_ref_count(files_struct *fsp)
520 {
521         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
522         TDB_DATA dbuf;
523         int lock_ref_count;
524
525         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
526         if (!dbuf.dptr) {
527                 lock_ref_count = 0;
528         } else {
529                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(int));
530         }
531         SAFE_FREE(dbuf.dptr);
532
533         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
534                 fsp->fsp_name, lock_ref_count ));
535         return lock_ref_count;
536 }
537
538 /****************************************************************************
539  Delete a lock_ref_count entry.
540 ****************************************************************************/
541
542 static void delete_windows_lock_ref_count(files_struct *fsp)
543 {
544         TDB_DATA kbuf = locking_ref_count_key_fsp(fsp);
545
546         /* Not a bug if it doesn't exist - no locks were ever granted. */
547         tdb_delete(posix_pending_close_tdb, kbuf);
548         DEBUG(10,("delete_windows_lock_ref_count for file %s\n", fsp->fsp_name));
549 }
550
551 /****************************************************************************
552  Add an fd to the pending close tdb.
553 ****************************************************************************/
554
555 static void add_fd_to_close_entry(files_struct *fsp)
556 {
557         TDB_DATA kbuf = fd_array_key_fsp(fsp);
558         TDB_DATA dbuf;
559
560         dbuf.dptr = NULL;
561         dbuf.dsize = 0;
562
563         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
564
565         dbuf.dptr = (uint8 *)SMB_REALLOC(dbuf.dptr, dbuf.dsize + sizeof(int));
566         if (!dbuf.dptr) {
567                 smb_panic("add_fd_to_close_entry: SMB_REALLOC failed");
568         }
569
570         memcpy(dbuf.dptr + dbuf.dsize, &fsp->fh->fd, sizeof(int));
571         dbuf.dsize += sizeof(int);
572
573         if (tdb_store(posix_pending_close_tdb, kbuf, dbuf, TDB_REPLACE) == -1) {
574                 smb_panic("add_fd_to_close_entry: tdb_store_fail");
575         }
576
577         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
578                 fsp->fh->fd, fsp->fsp_name ));
579
580         SAFE_FREE(dbuf.dptr);
581 }
582
583 /****************************************************************************
584  Remove all fd entries for a specific dev/inode pair from the tdb.
585 ****************************************************************************/
586
587 static void delete_close_entries(files_struct *fsp)
588 {
589         TDB_DATA kbuf = fd_array_key_fsp(fsp);
590
591         if (tdb_delete(posix_pending_close_tdb, kbuf) == -1) {
592                 smb_panic("delete_close_entries: tdb_delete failed");
593         }
594 }
595
596 /****************************************************************************
597  Get the array of POSIX pending close records for an open fsp. Caller must
598  free. Returns number of entries.
599 ****************************************************************************/
600
601 static size_t get_posix_pending_close_entries(files_struct *fsp, int **entries)
602 {
603         TDB_DATA kbuf = fd_array_key_fsp(fsp);
604         TDB_DATA dbuf;
605         size_t count = 0;
606
607         *entries = NULL;
608         dbuf.dptr = NULL;
609
610         dbuf = tdb_fetch(posix_pending_close_tdb, kbuf);
611
612         if (!dbuf.dptr) {
613                 return 0;
614         }
615
616         *entries = (int *)dbuf.dptr;
617         count = (size_t)(dbuf.dsize / sizeof(int));
618
619         return count;
620 }
621
622 /****************************************************************************
623  Deal with pending closes needed by POSIX locking support.
624  Note that posix_locking_close_file() is expected to have been called
625  to delete all locks on this fsp before this function is called.
626 ****************************************************************************/
627
628 NTSTATUS fd_close_posix(struct connection_struct *conn, files_struct *fsp)
629 {
630         int saved_errno = 0;
631         int ret;
632         int *fd_array = NULL;
633         size_t count, i;
634
635         if (!lp_locking(fsp->conn->params) || !lp_posix_locking(conn->params)) {
636                 /*
637                  * No locking or POSIX to worry about or we want POSIX semantics
638                  * which will lose all locks on all fd's open on this dev/inode,
639                  * just close.
640                  */
641                 ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
642                 fsp->fh->fd = -1;
643                 if (ret == -1) {
644                         return map_nt_error_from_unix(errno);
645                 }
646                 return NT_STATUS_OK;
647         }
648
649         if (get_windows_lock_ref_count(fsp)) {
650
651                 /*
652                  * There are outstanding locks on this dev/inode pair on other fds.
653                  * Add our fd to the pending close tdb and set fsp->fh->fd to -1.
654                  */
655
656                 add_fd_to_close_entry(fsp);
657                 fsp->fh->fd = -1;
658                 return NT_STATUS_OK;
659         }
660
661         /*
662          * No outstanding locks. Get the pending close fd's
663          * from the tdb and close them all.
664          */
665
666         count = get_posix_pending_close_entries(fsp, &fd_array);
667
668         if (count) {
669                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n", (unsigned int)count ));
670
671                 for(i = 0; i < count; i++) {
672                         if (SMB_VFS_CLOSE(fsp,fd_array[i]) == -1) {
673                                 saved_errno = errno;
674                         }
675                 }
676
677                 /*
678                  * Delete all fd's stored in the tdb
679                  * for this dev/inode pair.
680                  */
681
682                 delete_close_entries(fsp);
683         }
684
685         SAFE_FREE(fd_array);
686
687         /* Don't need a lock ref count on this dev/ino anymore. */
688         delete_windows_lock_ref_count(fsp);
689
690         /*
691          * Finally close the fd associated with this fsp.
692          */
693
694         ret = SMB_VFS_CLOSE(fsp,fsp->fh->fd);
695
696         if (ret == 0 && saved_errno != 0) {
697                 errno = saved_errno;
698                 ret = -1;
699         } 
700
701         fsp->fh->fd = -1;
702
703         if (ret == -1) {
704                 return map_nt_error_from_unix(errno);
705         }
706
707         return NT_STATUS_OK;
708 }
709
710 /****************************************************************************
711  Next - the functions that deal with the mapping CIFS Windows locks onto
712  the underlying system POSIX locks.
713 ****************************************************************************/
714
715 /*
716  * Structure used when splitting a lock range
717  * into a POSIX lock range. Doubly linked list.
718  */
719
720 struct lock_list {
721         struct lock_list *next;
722         struct lock_list *prev;
723         SMB_OFF_T start;
724         SMB_OFF_T size;
725 };
726
727 /****************************************************************************
728  Create a list of lock ranges that don't overlap a given range. Used in calculating
729  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
730  understand it :-).
731 ****************************************************************************/
732
733 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
734                                                 struct lock_list *lhead,
735                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
736                                                 files_struct *fsp,
737                                                 const struct lock_struct *plocks,
738                                                 int num_locks)
739 {
740         int i;
741
742         /*
743          * Check the current lock list on this dev/inode pair.
744          * Quit if the list is deleted.
745          */
746
747         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
748                 (double)lhead->start, (double)lhead->size ));
749
750         for (i=0; i<num_locks && lhead; i++) {
751                 const struct lock_struct *lock = &plocks[i];
752                 struct lock_list *l_curr;
753
754                 /* Ignore all but read/write locks. */
755                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
756                         continue;
757                 }
758
759                 /* Ignore locks not owned by this process. */
760                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
761                         continue;
762                 }
763
764                 /*
765                  * Walk the lock list, checking for overlaps. Note that
766                  * the lock list can expand within this loop if the current
767                  * range being examined needs to be split.
768                  */
769
770                 for (l_curr = lhead; l_curr;) {
771
772                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
773                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
774
775                         if ( (l_curr->start >= (lock->start + lock->size)) ||
776                                  (lock->start >= (l_curr->start + l_curr->size))) {
777
778                                 /* No overlap with existing lock - leave this range alone. */
779 /*********************************************
780                                              +---------+
781                                              | l_curr  |
782                                              +---------+
783                                 +-------+
784                                 | lock  |
785                                 +-------+
786 OR....
787              +---------+
788              |  l_curr |
789              +---------+
790 **********************************************/
791
792                                 DEBUG(10,(" no overlap case.\n" ));
793
794                                 l_curr = l_curr->next;
795
796                         } else if ( (l_curr->start >= lock->start) &&
797                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
798
799                                 /*
800                                  * This range is completely overlapped by this existing lock range
801                                  * and thus should have no effect. Delete it from the list.
802                                  */
803 /*********************************************
804                 +---------+
805                 |  l_curr |
806                 +---------+
807         +---------------------------+
808         |       lock                |
809         +---------------------------+
810 **********************************************/
811                                 /* Save the next pointer */
812                                 struct lock_list *ul_next = l_curr->next;
813
814                                 DEBUG(10,(" delete case.\n" ));
815
816                                 DLIST_REMOVE(lhead, l_curr);
817                                 if(lhead == NULL) {
818                                         break; /* No more list... */
819                                 }
820
821                                 l_curr = ul_next;
822                                 
823                         } else if ( (l_curr->start >= lock->start) &&
824                                                 (l_curr->start < lock->start + lock->size) &&
825                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
826
827                                 /*
828                                  * This range overlaps the existing lock range at the high end.
829                                  * Truncate by moving start to existing range end and reducing size.
830                                  */
831 /*********************************************
832                 +---------------+
833                 |  l_curr       |
834                 +---------------+
835         +---------------+
836         |    lock       |
837         +---------------+
838 BECOMES....
839                         +-------+
840                         | l_curr|
841                         +-------+
842 **********************************************/
843
844                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
845                                 l_curr->start = lock->start + lock->size;
846
847                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
848                                                                 (double)l_curr->start, (double)l_curr->size ));
849
850                                 l_curr = l_curr->next;
851
852                         } else if ( (l_curr->start < lock->start) &&
853                                                 (l_curr->start + l_curr->size > lock->start) &&
854                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
855
856                                 /*
857                                  * This range overlaps the existing lock range at the low end.
858                                  * Truncate by reducing size.
859                                  */
860 /*********************************************
861    +---------------+
862    |  l_curr       |
863    +---------------+
864            +---------------+
865            |    lock       |
866            +---------------+
867 BECOMES....
868    +-------+
869    | l_curr|
870    +-------+
871 **********************************************/
872
873                                 l_curr->size = lock->start - l_curr->start;
874
875                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
876                                                                 (double)l_curr->start, (double)l_curr->size ));
877
878                                 l_curr = l_curr->next;
879                 
880                         } else if ( (l_curr->start < lock->start) &&
881                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
882                                 /*
883                                  * Worst case scenario. Range completely overlaps an existing
884                                  * lock range. Split the request into two, push the new (upper) request
885                                  * into the dlink list, and continue with the entry after l_new (as we
886                                  * know that l_new will not overlap with this lock).
887                                  */
888 /*********************************************
889         +---------------------------+
890         |        l_curr             |
891         +---------------------------+
892                 +---------+
893                 | lock    |
894                 +---------+
895 BECOMES.....
896         +-------+         +---------+
897         | l_curr|         | l_new   |
898         +-------+         +---------+
899 **********************************************/
900                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
901
902                                 if(l_new == NULL) {
903                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
904                                         return NULL; /* The talloc_destroy takes care of cleanup. */
905                                 }
906
907                                 ZERO_STRUCTP(l_new);
908                                 l_new->start = lock->start + lock->size;
909                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
910
911                                 /* Truncate the l_curr. */
912                                 l_curr->size = lock->start - l_curr->start;
913
914                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
915 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
916                                                                 (double)l_new->start, (double)l_new->size ));
917
918                                 /*
919                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
920                                  * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
921                                  */
922
923                                 l_new->prev = l_curr;
924                                 l_new->next = l_curr->next;
925                                 l_curr->next = l_new;
926
927                                 /* And move after the link we added. */
928                                 l_curr = l_new->next;
929
930                         } else {
931
932                                 /*
933                                  * This logic case should never happen. Ensure this is the
934                                  * case by forcing an abort.... Remove in production.
935                                  */
936                                 pstring msg;
937
938                                 slprintf(msg, sizeof(msg)-1, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
939 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
940
941                                 smb_panic(msg);
942                         }
943                 } /* end for ( l_curr = lhead; l_curr;) */
944         } /* end for (i=0; i<num_locks && ul_head; i++) */
945
946         return lhead;
947 }
948
949 /****************************************************************************
950  POSIX function to acquire a lock. Returns True if the
951  lock could be granted, False if not.
952 ****************************************************************************/
953
954 bool set_posix_lock_windows_flavour(files_struct *fsp,
955                         SMB_BIG_UINT u_offset,
956                         SMB_BIG_UINT u_count,
957                         enum brl_type lock_type,
958                         const struct lock_context *lock_ctx,
959                         const struct lock_struct *plocks,
960                         int num_locks,
961                         int *errno_ret)
962 {
963         SMB_OFF_T offset;
964         SMB_OFF_T count;
965         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
966         bool ret = True;
967         size_t lock_count;
968         TALLOC_CTX *l_ctx = NULL;
969         struct lock_list *llist = NULL;
970         struct lock_list *ll = NULL;
971
972         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
973                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
974
975         /*
976          * If the requested lock won't fit in the POSIX range, we will
977          * pretend it was successful.
978          */
979
980         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
981                 increment_windows_lock_ref_count(fsp);
982                 return True;
983         }
984
985         /*
986          * Windows is very strange. It allows read locks to be overlayed
987          * (even over a write lock), but leaves the write lock in force until the first
988          * unlock. It also reference counts the locks. This means the following sequence :
989          *
990          * process1                                      process2
991          * ------------------------------------------------------------------------
992          * WRITE LOCK : start = 2, len = 10
993          *                                            READ LOCK: start =0, len = 10 - FAIL
994          * READ LOCK : start = 0, len = 14 
995          *                                            READ LOCK: start =0, len = 10 - FAIL
996          * UNLOCK : start = 2, len = 10
997          *                                            READ LOCK: start =0, len = 10 - OK
998          *
999          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1000          * would leave a single read lock over the 0-14 region.
1001          */
1002         
1003         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1004                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1005                 return False;
1006         }
1007
1008         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1009                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1010                 talloc_destroy(l_ctx);
1011                 return False;
1012         }
1013
1014         /*
1015          * Create the initial list entry containing the
1016          * lock we want to add.
1017          */
1018
1019         ZERO_STRUCTP(ll);
1020         ll->start = offset;
1021         ll->size = count;
1022
1023         DLIST_ADD(llist, ll);
1024
1025         /*
1026          * The following call calculates if there are any
1027          * overlapping locks held by this process on
1028          * fd's open on the same file and splits this list
1029          * into a list of lock ranges that do not overlap with existing
1030          * POSIX locks.
1031          */
1032
1033         llist = posix_lock_list(l_ctx,
1034                                 llist,
1035                                 lock_ctx, /* Lock context llist belongs to. */
1036                                 fsp,
1037                                 plocks,
1038                                 num_locks);
1039
1040         /*
1041          * Add the POSIX locks on the list of ranges returned.
1042          * As the lock is supposed to be added atomically, we need to
1043          * back out all the locks if any one of these calls fail.
1044          */
1045
1046         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1047                 offset = ll->start;
1048                 count = ll->size;
1049
1050                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1051                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1052
1053                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1054                         *errno_ret = errno;
1055                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1056                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1057                         ret = False;
1058                         break;
1059                 }
1060         }
1061
1062         if (!ret) {
1063
1064                 /*
1065                  * Back out all the POSIX locks we have on fail.
1066                  */
1067
1068                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1069                         offset = ll->start;
1070                         count = ll->size;
1071
1072                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1073                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1074
1075                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1076                 }
1077         } else {
1078                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1079                 increment_windows_lock_ref_count(fsp);
1080         }
1081
1082         talloc_destroy(l_ctx);
1083         return ret;
1084 }
1085
1086 /****************************************************************************
1087  POSIX function to release a lock. Returns True if the
1088  lock could be released, False if not.
1089 ****************************************************************************/
1090
1091 bool release_posix_lock_windows_flavour(files_struct *fsp,
1092                                 SMB_BIG_UINT u_offset,
1093                                 SMB_BIG_UINT u_count,
1094                                 enum brl_type deleted_lock_type,
1095                                 const struct lock_context *lock_ctx,
1096                                 const struct lock_struct *plocks,
1097                                 int num_locks)
1098 {
1099         SMB_OFF_T offset;
1100         SMB_OFF_T count;
1101         bool ret = True;
1102         TALLOC_CTX *ul_ctx = NULL;
1103         struct lock_list *ulist = NULL;
1104         struct lock_list *ul = NULL;
1105
1106         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f\n",
1107                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1108
1109         /* Remember the number of Windows locks we have on this dev/ino pair. */
1110         decrement_windows_lock_ref_count(fsp);
1111
1112         /*
1113          * If the requested lock won't fit in the POSIX range, we will
1114          * pretend it was successful.
1115          */
1116
1117         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1118                 return True;
1119         }
1120
1121         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1122                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1123                 return False;
1124         }
1125
1126         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1127                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1128                 talloc_destroy(ul_ctx);
1129                 return False;
1130         }
1131
1132         /*
1133          * Create the initial list entry containing the
1134          * lock we want to remove.
1135          */
1136
1137         ZERO_STRUCTP(ul);
1138         ul->start = offset;
1139         ul->size = count;
1140
1141         DLIST_ADD(ulist, ul);
1142
1143         /*
1144          * The following call calculates if there are any
1145          * overlapping locks held by this process on
1146          * fd's open on the same file and creates a
1147          * list of unlock ranges that will allow
1148          * POSIX lock ranges to remain on the file whilst the
1149          * unlocks are performed.
1150          */
1151
1152         ulist = posix_lock_list(ul_ctx,
1153                                 ulist,
1154                                 lock_ctx, /* Lock context ulist belongs to. */
1155                                 fsp,
1156                                 plocks,
1157                                 num_locks);
1158
1159         /*
1160          * If there were any overlapped entries (list is > 1 or size or start have changed),
1161          * and the lock_type we just deleted from
1162          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1163          * the POSIX lock to a read lock. This allows any overlapping read locks
1164          * to be atomically maintained.
1165          */
1166
1167         if (deleted_lock_type == WRITE_LOCK &&
1168                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1169
1170                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1171                         (double)offset, (double)count ));
1172
1173                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1174                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1175                         talloc_destroy(ul_ctx);
1176                         return False;
1177                 }
1178         }
1179
1180         /*
1181          * Release the POSIX locks on the list of ranges returned.
1182          */
1183
1184         for(; ulist; ulist = ulist->next) {
1185                 offset = ulist->start;
1186                 count = ulist->size;
1187
1188                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1189                         (double)offset, (double)count ));
1190
1191                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1192                         ret = False;
1193                 }
1194         }
1195
1196         talloc_destroy(ul_ctx);
1197         return ret;
1198 }
1199
1200 /****************************************************************************
1201  Next - the functions that deal with mapping CIFS POSIX locks onto
1202  the underlying system POSIX locks.
1203 ****************************************************************************/
1204
1205 /****************************************************************************
1206  POSIX function to acquire a lock. Returns True if the
1207  lock could be granted, False if not.
1208  As POSIX locks don't stack or conflict (they just overwrite)
1209  we can map the requested lock directly onto a system one. We
1210  know it doesn't conflict with locks on other contexts as the
1211  upper layer would have refused it.
1212 ****************************************************************************/
1213
1214 bool set_posix_lock_posix_flavour(files_struct *fsp,
1215                         SMB_BIG_UINT u_offset,
1216                         SMB_BIG_UINT u_count,
1217                         enum brl_type lock_type,
1218                         int *errno_ret)
1219 {
1220         SMB_OFF_T offset;
1221         SMB_OFF_T count;
1222         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1223
1224         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
1225                         fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
1226
1227         /*
1228          * If the requested lock won't fit in the POSIX range, we will
1229          * pretend it was successful.
1230          */
1231
1232         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1233                 return True;
1234         }
1235
1236         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1237                 *errno_ret = errno;
1238                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1239                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1240                 return False;
1241         }
1242         return True;
1243 }
1244
1245 /****************************************************************************
1246  POSIX function to release a lock. Returns True if the
1247  lock could be released, False if not.
1248  We are given a complete lock state from the upper layer which is what the lock
1249  state should be after the unlock has already been done, so what
1250  we do is punch out holes in the unlock range where locks owned by this process
1251  have a different lock context.
1252 ****************************************************************************/
1253
1254 bool release_posix_lock_posix_flavour(files_struct *fsp,
1255                                 SMB_BIG_UINT u_offset,
1256                                 SMB_BIG_UINT u_count,
1257                                 const struct lock_context *lock_ctx,
1258                                 const struct lock_struct *plocks,
1259                                 int num_locks)
1260 {
1261         bool ret = True;
1262         SMB_OFF_T offset;
1263         SMB_OFF_T count;
1264         TALLOC_CTX *ul_ctx = NULL;
1265         struct lock_list *ulist = NULL;
1266         struct lock_list *ul = NULL;
1267
1268         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f\n",
1269                 fsp->fsp_name, (double)u_offset, (double)u_count ));
1270
1271         /*
1272          * If the requested lock won't fit in the POSIX range, we will
1273          * pretend it was successful.
1274          */
1275
1276         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1277                 return True;
1278         }
1279
1280         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1281                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1282                 return False;
1283         }
1284
1285         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1286                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1287                 talloc_destroy(ul_ctx);
1288                 return False;
1289         }
1290
1291         /*
1292          * Create the initial list entry containing the
1293          * lock we want to remove.
1294          */
1295
1296         ZERO_STRUCTP(ul);
1297         ul->start = offset;
1298         ul->size = count;
1299
1300         DLIST_ADD(ulist, ul);
1301
1302         /*
1303          * Walk the given array creating a linked list
1304          * of unlock requests.
1305          */
1306
1307         ulist = posix_lock_list(ul_ctx,
1308                                 ulist,
1309                                 lock_ctx, /* Lock context ulist belongs to. */
1310                                 fsp,
1311                                 plocks,
1312                                 num_locks);
1313
1314         /*
1315          * Release the POSIX locks on the list of ranges returned.
1316          */
1317
1318         for(; ulist; ulist = ulist->next) {
1319                 offset = ulist->start;
1320                 count = ulist->size;
1321
1322                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1323                         (double)offset, (double)count ));
1324
1325                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1326                         ret = False;
1327                 }
1328         }
1329
1330         talloc_destroy(ul_ctx);
1331         return ret;
1332 }