s3: Add a "lock_order" argument to db_open
[kai/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
30
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
33
34 /*
35  * The pending close database handle.
36  */
37
38 static struct db_context *posix_pending_close_db;
39
40 /****************************************************************************
41  First - the functions that deal with the underlying system locks - these
42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
43  POSIX locks onto POSIX.
44 ****************************************************************************/
45
46 /****************************************************************************
47  Utility function to map a lock type correctly depending on the open
48  mode of a file.
49 ****************************************************************************/
50
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
52 {
53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
54                 /*
55                  * Many UNIX's cannot get a write lock on a file opened read-only.
56                  * Win32 locking semantics allow this.
57                  * Do the best we can and attempt a read-only lock.
58                  */
59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60                 return F_RDLCK;
61         }
62
63         /*
64          * This return should be the most normal, as we attempt
65          * to always open files read/write.
66          */
67
68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
69 }
70
71 /****************************************************************************
72  Debugging aid :-).
73 ****************************************************************************/
74
75 static const char *posix_lock_type_name(int lock_type)
76 {
77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
78 }
79
80 /****************************************************************************
81  Check to see if the given unsigned lock range is within the possible POSIX
82  range. Modifies the given args to be in range if possible, just returns
83  False if not.
84 ****************************************************************************/
85
86 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
87                                 uint64_t u_offset, uint64_t u_count)
88 {
89         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
90         SMB_OFF_T count = (SMB_OFF_T)u_count;
91
92         /*
93          * For the type of system we are, attempt to
94          * find the maximum positive lock offset as an SMB_OFF_T.
95          */
96
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
98
99         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100
101 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
102
103         /*
104          * In this case SMB_OFF_T is 64 bits,
105          * and the underlying system can handle 64 bit signed locks.
106          */
107
108         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
109         SMB_OFF_T mask = (mask2<<1);
110         SMB_OFF_T max_positive_lock_offset = ~mask;
111
112 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
113
114         /*
115          * In this case either SMB_OFF_T is 32 bits,
116          * or the underlying system cannot handle 64 bit signed locks.
117          * All offsets & counts must be 2^31 or less.
118          */
119
120         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
121
122 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
123
124         /*
125          * POSIX locks of length zero mean lock to end-of-file.
126          * Win32 locks of length zero are point probes. Ignore
127          * any Win32 locks of length zero. JRA.
128          */
129
130         if (count == (SMB_OFF_T)0) {
131                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
132                 return False;
133         }
134
135         /*
136          * If the given offset was > max_positive_lock_offset then we cannot map this at all
137          * ignore this lock.
138          */
139
140         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
141                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
142                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
143                 return False;
144         }
145
146         /*
147          * We must truncate the count to less than max_positive_lock_offset.
148          */
149
150         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
151                 count = max_positive_lock_offset;
152         }
153
154         /*
155          * Truncate count to end at max lock offset.
156          */
157
158         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
159                 count = max_positive_lock_offset - offset;
160         }
161
162         /*
163          * If we ate all the count, ignore this lock.
164          */
165
166         if (count == 0) {
167                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
168                                 (double)u_offset, (double)u_count ));
169                 return False;
170         }
171
172         /*
173          * The mapping was successful.
174          */
175
176         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
177                         (double)offset, (double)count ));
178
179         *offset_out = offset;
180         *count_out = count;
181         
182         return True;
183 }
184
185 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
186                        struct files_struct *fsp, int op, SMB_OFF_T offset,
187                        SMB_OFF_T count, int type)
188 {
189         VFS_FIND(lock);
190         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
191 }
192
193 /****************************************************************************
194  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
195  broken NFS implementations.
196 ****************************************************************************/
197
198 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
199 {
200         bool ret;
201
202         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
203
204         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
205
206         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
207
208                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
209                                         (double)offset,(double)count));
210                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
211                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
212
213                 /*
214                  * If the offset is > 0x7FFFFFFF then this will cause problems on
215                  * 32 bit NFS mounted filesystems. Just ignore it.
216                  */
217
218                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
219                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
220                         return True;
221                 }
222
223                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
224                         /* 32 bit NFS file system, retry with smaller offset */
225                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
226                         errno = 0;
227                         count &= 0x7fffffff;
228                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
229                 }
230         }
231
232         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
233         return ret;
234 }
235
236 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
237                           struct files_struct *fsp, SMB_OFF_T *poffset,
238                           SMB_OFF_T *pcount, int *ptype, pid_t *ppid)
239 {
240         VFS_FIND(getlock);
241         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype, 
242                                        ppid);
243 }
244
245 /****************************************************************************
246  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
247  broken NFS implementations.
248 ****************************************************************************/
249
250 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
251 {
252         pid_t pid;
253         bool ret;
254
255         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
256                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
257
258         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
259
260         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
261
262                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
263                                         (double)*poffset,(double)*pcount));
264                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
265                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
266
267                 /*
268                  * If the offset is > 0x7FFFFFFF then this will cause problems on
269                  * 32 bit NFS mounted filesystems. Just ignore it.
270                  */
271
272                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
273                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
274                         return True;
275                 }
276
277                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
278                         /* 32 bit NFS file system, retry with smaller offset */
279                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
280                         errno = 0;
281                         *pcount &= 0x7fffffff;
282                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
283                 }
284         }
285
286         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
287         return ret;
288 }
289
290 /****************************************************************************
291  POSIX function to see if a file region is locked. Returns True if the
292  region is locked, False otherwise.
293 ****************************************************************************/
294
295 bool is_posix_locked(files_struct *fsp,
296                         uint64_t *pu_offset,
297                         uint64_t *pu_count,
298                         enum brl_type *plock_type,
299                         enum brl_flavour lock_flav)
300 {
301         SMB_OFF_T offset;
302         SMB_OFF_T count;
303         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
304
305         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
306                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
307                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
308
309         /*
310          * If the requested lock won't fit in the POSIX range, we will
311          * never set it, so presume it is not locked.
312          */
313
314         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
315                 return False;
316         }
317
318         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
319                 return False;
320         }
321
322         if (posix_lock_type == F_UNLCK) {
323                 return False;
324         }
325
326         if (lock_flav == POSIX_LOCK) {
327                 /* Only POSIX lock queries need to know the details. */
328                 *pu_offset = (uint64_t)offset;
329                 *pu_count = (uint64_t)count;
330                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
331         }
332         return True;
333 }
334
335 /****************************************************************************
336  Next - the functions that deal with in memory database storing representations
337  of either Windows CIFS locks or POSIX CIFS locks.
338 ****************************************************************************/
339
340 /* The key used in the in-memory POSIX databases. */
341
342 struct lock_ref_count_key {
343         struct file_id id;
344         char r;
345 }; 
346
347 /*******************************************************************
348  Form a static locking key for a dev/inode pair for the lock ref count
349 ******************************************************************/
350
351 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
352                                           struct lock_ref_count_key *tmp)
353 {
354         ZERO_STRUCTP(tmp);
355         tmp->id = fsp->file_id;
356         tmp->r = 'r';
357         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
358 }
359
360 /*******************************************************************
361  Convenience function to get an fd_array key from an fsp.
362 ******************************************************************/
363
364 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
365 {
366         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
367 }
368
369 /*******************************************************************
370  Create the in-memory POSIX lock databases.
371 ********************************************************************/
372
373 bool posix_locking_init(bool read_only)
374 {
375         if (posix_pending_close_db != NULL) {
376                 return true;
377         }
378
379         posix_pending_close_db = db_open_rbt(NULL);
380
381         if (posix_pending_close_db == NULL) {
382                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
383                 return false;
384         }
385
386         return true;
387 }
388
389 /*******************************************************************
390  Delete the in-memory POSIX lock databases.
391 ********************************************************************/
392
393 bool posix_locking_end(void)
394 {
395         /*
396          * Shouldn't we close all fd's here?
397          */
398         TALLOC_FREE(posix_pending_close_db);
399         return true;
400 }
401
402 /****************************************************************************
403  Next - the functions that deal with storing fd's that have outstanding
404  POSIX locks when closed.
405 ****************************************************************************/
406
407 /****************************************************************************
408  The records in posix_pending_close_tdb are composed of an array of ints
409  keyed by dev/ino pair.
410  The first int is a reference count of the number of outstanding locks on
411  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
412  were open on this dev/ino pair that should have been closed, but can't as
413  the lock ref count is non zero.
414 ****************************************************************************/
415
416 /****************************************************************************
417  Keep a reference count of the number of Windows locks open on this dev/ino
418  pair. Creates entry if it doesn't exist.
419 ****************************************************************************/
420
421 static void increment_windows_lock_ref_count(files_struct *fsp)
422 {
423         struct lock_ref_count_key tmp;
424         struct db_record *rec;
425         int lock_ref_count = 0;
426         NTSTATUS status;
427         TDB_DATA value;
428
429         rec = dbwrap_fetch_locked(
430                 posix_pending_close_db, talloc_tos(),
431                 locking_ref_count_key_fsp(fsp, &tmp));
432
433         SMB_ASSERT(rec != NULL);
434
435         value = dbwrap_record_get_value(rec);
436
437         if (value.dptr != NULL) {
438                 SMB_ASSERT(value.dsize == sizeof(lock_ref_count));
439                 memcpy(&lock_ref_count, value.dptr,
440                        sizeof(lock_ref_count));
441         }
442
443         lock_ref_count++;
444
445         status = dbwrap_record_store(rec,
446                                      make_tdb_data((uint8 *)&lock_ref_count,
447                                      sizeof(lock_ref_count)), 0);
448
449         SMB_ASSERT(NT_STATUS_IS_OK(status));
450
451         TALLOC_FREE(rec);
452
453         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
454                   fsp_str_dbg(fsp), lock_ref_count));
455 }
456
457 /****************************************************************************
458  Bulk delete - subtract as many locks as we've just deleted.
459 ****************************************************************************/
460
461 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
462 {
463         struct lock_ref_count_key tmp;
464         struct db_record *rec;
465         int lock_ref_count = 0;
466         NTSTATUS status;
467         TDB_DATA value;
468
469         rec = dbwrap_fetch_locked(
470                 posix_pending_close_db, talloc_tos(),
471                 locking_ref_count_key_fsp(fsp, &tmp));
472
473         value = dbwrap_record_get_value(rec);
474
475         SMB_ASSERT((rec != NULL)
476                    && (value.dptr != NULL)
477                    && (value.dsize == sizeof(lock_ref_count)));
478
479         memcpy(&lock_ref_count, value.dptr, sizeof(lock_ref_count));
480
481         SMB_ASSERT(lock_ref_count > 0);
482
483         lock_ref_count -= dcount;
484
485         status = dbwrap_record_store(rec,
486                                      make_tdb_data((uint8 *)&lock_ref_count,
487                                      sizeof(lock_ref_count)), 0);
488
489         SMB_ASSERT(NT_STATUS_IS_OK(status));
490
491         TALLOC_FREE(rec);
492
493         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
494                   fsp_str_dbg(fsp), lock_ref_count));
495 }
496
497 static void decrement_windows_lock_ref_count(files_struct *fsp)
498 {
499         reduce_windows_lock_ref_count(fsp, 1);
500 }
501
502 /****************************************************************************
503  Fetch the lock ref count.
504 ****************************************************************************/
505
506 static int get_windows_lock_ref_count(files_struct *fsp)
507 {
508         struct lock_ref_count_key tmp;
509         TDB_DATA dbuf;
510         NTSTATUS status;
511         int lock_ref_count = 0;
512
513         status = dbwrap_fetch(
514                 posix_pending_close_db, talloc_tos(),
515                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
516
517         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
518                 goto done;
519         }
520
521         if (!NT_STATUS_IS_OK(status)) {
522                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
523                           "lock ref count for file %s: %s\n",
524                           fsp_str_dbg(fsp), nt_errstr(status)));
525                 goto done;
526         }
527
528         if (dbuf.dsize != sizeof(lock_ref_count)) {
529                 DEBUG(0, ("get_windows_lock_ref_count: invalid entry "
530                           "in lock ref count record for file %s: "
531                           "(invalid data size %u)\n",
532                           fsp_str_dbg(fsp), (unsigned int)dbuf.dsize));
533                 goto done;
534         }
535
536         memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
537         TALLOC_FREE(dbuf.dptr);
538
539 done:
540         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
541                   fsp_str_dbg(fsp), lock_ref_count));
542
543         return lock_ref_count;
544 }
545
546 /****************************************************************************
547  Delete a lock_ref_count entry.
548 ****************************************************************************/
549
550 static void delete_windows_lock_ref_count(files_struct *fsp)
551 {
552         struct lock_ref_count_key tmp;
553         struct db_record *rec;
554
555         rec = dbwrap_fetch_locked(
556                 posix_pending_close_db, talloc_tos(),
557                 locking_ref_count_key_fsp(fsp, &tmp));
558
559         SMB_ASSERT(rec != NULL);
560
561         /* Not a bug if it doesn't exist - no locks were ever granted. */
562
563         dbwrap_record_delete(rec);
564         TALLOC_FREE(rec);
565
566         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
567                   fsp_str_dbg(fsp)));
568 }
569
570 /****************************************************************************
571  Add an fd to the pending close tdb.
572 ****************************************************************************/
573
574 static void add_fd_to_close_entry(files_struct *fsp)
575 {
576         struct db_record *rec;
577         uint8_t *new_data;
578         NTSTATUS status;
579         TDB_DATA value;
580
581         rec = dbwrap_fetch_locked(
582                 posix_pending_close_db, talloc_tos(),
583                 fd_array_key_fsp(fsp));
584
585         SMB_ASSERT(rec != NULL);
586
587         value = dbwrap_record_get_value(rec);
588
589         new_data = talloc_array(rec, uint8_t,
590                                 value.dsize + sizeof(fsp->fh->fd));
591
592         SMB_ASSERT(new_data != NULL);
593
594         memcpy(new_data, value.dptr, value.dsize);
595         memcpy(new_data + value.dsize,
596                &fsp->fh->fd, sizeof(fsp->fh->fd));
597
598         status = dbwrap_record_store(
599                 rec, make_tdb_data(new_data,
600                                    value.dsize + sizeof(fsp->fh->fd)), 0);
601
602         SMB_ASSERT(NT_STATUS_IS_OK(status));
603
604         TALLOC_FREE(rec);
605
606         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
607                   fsp->fh->fd, fsp_str_dbg(fsp)));
608 }
609
610 /****************************************************************************
611  Remove all fd entries for a specific dev/inode pair from the tdb.
612 ****************************************************************************/
613
614 static void delete_close_entries(files_struct *fsp)
615 {
616         struct db_record *rec;
617
618         rec = dbwrap_fetch_locked(
619                 posix_pending_close_db, talloc_tos(),
620                 fd_array_key_fsp(fsp));
621
622         SMB_ASSERT(rec != NULL);
623         dbwrap_record_delete(rec);
624         TALLOC_FREE(rec);
625 }
626
627 /****************************************************************************
628  Get the array of POSIX pending close records for an open fsp. Returns number
629  of entries.
630 ****************************************************************************/
631
632 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
633                                               files_struct *fsp, int **entries)
634 {
635         TDB_DATA dbuf;
636         NTSTATUS status;
637
638         status = dbwrap_fetch(
639                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
640                 &dbuf);
641
642         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
643                 *entries = NULL;
644                 return 0;
645         }
646
647         SMB_ASSERT(NT_STATUS_IS_OK(status));
648
649         if (dbuf.dsize == 0) {
650                 *entries = NULL;
651                 return 0;
652         }
653
654         *entries = (int *)dbuf.dptr;
655         return (size_t)(dbuf.dsize / sizeof(int));
656 }
657
658 /****************************************************************************
659  Deal with pending closes needed by POSIX locking support.
660  Note that posix_locking_close_file() is expected to have been called
661  to delete all locks on this fsp before this function is called.
662 ****************************************************************************/
663
664 int fd_close_posix(struct files_struct *fsp)
665 {
666         int saved_errno = 0;
667         int ret;
668         int *fd_array = NULL;
669         size_t count, i;
670
671         if (!lp_locking(fsp->conn->params) ||
672             !lp_posix_locking(fsp->conn->params))
673         {
674                 /*
675                  * No locking or POSIX to worry about or we want POSIX semantics
676                  * which will lose all locks on all fd's open on this dev/inode,
677                  * just close.
678                  */
679                 return close(fsp->fh->fd);
680         }
681
682         if (get_windows_lock_ref_count(fsp)) {
683
684                 /*
685                  * There are outstanding locks on this dev/inode pair on
686                  * other fds. Add our fd to the pending close tdb and set
687                  * fsp->fh->fd to -1.
688                  */
689
690                 add_fd_to_close_entry(fsp);
691                 return 0;
692         }
693
694         /*
695          * No outstanding locks. Get the pending close fd's
696          * from the tdb and close them all.
697          */
698
699         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
700
701         if (count) {
702                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
703                           (unsigned int)count));
704
705                 for(i = 0; i < count; i++) {
706                         if (close(fd_array[i]) == -1) {
707                                 saved_errno = errno;
708                         }
709                 }
710
711                 /*
712                  * Delete all fd's stored in the tdb
713                  * for this dev/inode pair.
714                  */
715
716                 delete_close_entries(fsp);
717         }
718
719         TALLOC_FREE(fd_array);
720
721         /* Don't need a lock ref count on this dev/ino anymore. */
722         delete_windows_lock_ref_count(fsp);
723
724         /*
725          * Finally close the fd associated with this fsp.
726          */
727
728         ret = close(fsp->fh->fd);
729
730         if (ret == 0 && saved_errno != 0) {
731                 errno = saved_errno;
732                 ret = -1;
733         }
734
735         return ret;
736 }
737
738 /****************************************************************************
739  Next - the functions that deal with the mapping CIFS Windows locks onto
740  the underlying system POSIX locks.
741 ****************************************************************************/
742
743 /*
744  * Structure used when splitting a lock range
745  * into a POSIX lock range. Doubly linked list.
746  */
747
748 struct lock_list {
749         struct lock_list *next;
750         struct lock_list *prev;
751         SMB_OFF_T start;
752         SMB_OFF_T size;
753 };
754
755 /****************************************************************************
756  Create a list of lock ranges that don't overlap a given range. Used in calculating
757  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
758  understand it :-).
759 ****************************************************************************/
760
761 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
762                                                 struct lock_list *lhead,
763                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
764                                                 files_struct *fsp,
765                                                 const struct lock_struct *plocks,
766                                                 int num_locks)
767 {
768         int i;
769
770         /*
771          * Check the current lock list on this dev/inode pair.
772          * Quit if the list is deleted.
773          */
774
775         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
776                 (double)lhead->start, (double)lhead->size ));
777
778         for (i=0; i<num_locks && lhead; i++) {
779                 const struct lock_struct *lock = &plocks[i];
780                 struct lock_list *l_curr;
781
782                 /* Ignore all but read/write locks. */
783                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
784                         continue;
785                 }
786
787                 /* Ignore locks not owned by this process. */
788                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
789                         continue;
790                 }
791
792                 /*
793                  * Walk the lock list, checking for overlaps. Note that
794                  * the lock list can expand within this loop if the current
795                  * range being examined needs to be split.
796                  */
797
798                 for (l_curr = lhead; l_curr;) {
799
800                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
801                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
802
803                         if ( (l_curr->start >= (lock->start + lock->size)) ||
804                                  (lock->start >= (l_curr->start + l_curr->size))) {
805
806                                 /* No overlap with existing lock - leave this range alone. */
807 /*********************************************
808                                              +---------+
809                                              | l_curr  |
810                                              +---------+
811                                 +-------+
812                                 | lock  |
813                                 +-------+
814 OR....
815              +---------+
816              |  l_curr |
817              +---------+
818 **********************************************/
819
820                                 DEBUG(10,(" no overlap case.\n" ));
821
822                                 l_curr = l_curr->next;
823
824                         } else if ( (l_curr->start >= lock->start) &&
825                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
826
827                                 /*
828                                  * This range is completely overlapped by this existing lock range
829                                  * and thus should have no effect. Delete it from the list.
830                                  */
831 /*********************************************
832                 +---------+
833                 |  l_curr |
834                 +---------+
835         +---------------------------+
836         |       lock                |
837         +---------------------------+
838 **********************************************/
839                                 /* Save the next pointer */
840                                 struct lock_list *ul_next = l_curr->next;
841
842                                 DEBUG(10,(" delete case.\n" ));
843
844                                 DLIST_REMOVE(lhead, l_curr);
845                                 if(lhead == NULL) {
846                                         break; /* No more list... */
847                                 }
848
849                                 l_curr = ul_next;
850                                 
851                         } else if ( (l_curr->start >= lock->start) &&
852                                                 (l_curr->start < lock->start + lock->size) &&
853                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
854
855                                 /*
856                                  * This range overlaps the existing lock range at the high end.
857                                  * Truncate by moving start to existing range end and reducing size.
858                                  */
859 /*********************************************
860                 +---------------+
861                 |  l_curr       |
862                 +---------------+
863         +---------------+
864         |    lock       |
865         +---------------+
866 BECOMES....
867                         +-------+
868                         | l_curr|
869                         +-------+
870 **********************************************/
871
872                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
873                                 l_curr->start = lock->start + lock->size;
874
875                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
876                                                                 (double)l_curr->start, (double)l_curr->size ));
877
878                                 l_curr = l_curr->next;
879
880                         } else if ( (l_curr->start < lock->start) &&
881                                                 (l_curr->start + l_curr->size > lock->start) &&
882                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
883
884                                 /*
885                                  * This range overlaps the existing lock range at the low end.
886                                  * Truncate by reducing size.
887                                  */
888 /*********************************************
889    +---------------+
890    |  l_curr       |
891    +---------------+
892            +---------------+
893            |    lock       |
894            +---------------+
895 BECOMES....
896    +-------+
897    | l_curr|
898    +-------+
899 **********************************************/
900
901                                 l_curr->size = lock->start - l_curr->start;
902
903                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
904                                                                 (double)l_curr->start, (double)l_curr->size ));
905
906                                 l_curr = l_curr->next;
907                 
908                         } else if ( (l_curr->start < lock->start) &&
909                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
910                                 /*
911                                  * Worst case scenario. Range completely overlaps an existing
912                                  * lock range. Split the request into two, push the new (upper) request
913                                  * into the dlink list, and continue with the entry after l_new (as we
914                                  * know that l_new will not overlap with this lock).
915                                  */
916 /*********************************************
917         +---------------------------+
918         |        l_curr             |
919         +---------------------------+
920                 +---------+
921                 | lock    |
922                 +---------+
923 BECOMES.....
924         +-------+         +---------+
925         | l_curr|         | l_new   |
926         +-------+         +---------+
927 **********************************************/
928                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
929
930                                 if(l_new == NULL) {
931                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
932                                         return NULL; /* The talloc_destroy takes care of cleanup. */
933                                 }
934
935                                 ZERO_STRUCTP(l_new);
936                                 l_new->start = lock->start + lock->size;
937                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
938
939                                 /* Truncate the l_curr. */
940                                 l_curr->size = lock->start - l_curr->start;
941
942                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
943 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
944                                                                 (double)l_new->start, (double)l_new->size ));
945
946                                 /*
947                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
948                                  */
949                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
950
951                                 /* And move after the link we added. */
952                                 l_curr = l_new->next;
953
954                         } else {
955
956                                 /*
957                                  * This logic case should never happen. Ensure this is the
958                                  * case by forcing an abort.... Remove in production.
959                                  */
960                                 char *msg = NULL;
961
962                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
963 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
964                                         smb_panic(msg);
965                                 } else {
966                                         smb_panic("posix_lock_list");
967                                 }
968                         }
969                 } /* end for ( l_curr = lhead; l_curr;) */
970         } /* end for (i=0; i<num_locks && ul_head; i++) */
971
972         return lhead;
973 }
974
975 /****************************************************************************
976  POSIX function to acquire a lock. Returns True if the
977  lock could be granted, False if not.
978 ****************************************************************************/
979
980 bool set_posix_lock_windows_flavour(files_struct *fsp,
981                         uint64_t u_offset,
982                         uint64_t u_count,
983                         enum brl_type lock_type,
984                         const struct lock_context *lock_ctx,
985                         const struct lock_struct *plocks,
986                         int num_locks,
987                         int *errno_ret)
988 {
989         SMB_OFF_T offset;
990         SMB_OFF_T count;
991         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
992         bool ret = True;
993         size_t lock_count;
994         TALLOC_CTX *l_ctx = NULL;
995         struct lock_list *llist = NULL;
996         struct lock_list *ll = NULL;
997
998         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
999                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
1000                  (double)u_offset, (double)u_count,
1001                  posix_lock_type_name(lock_type)));
1002
1003         /*
1004          * If the requested lock won't fit in the POSIX range, we will
1005          * pretend it was successful.
1006          */
1007
1008         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1009                 increment_windows_lock_ref_count(fsp);
1010                 return True;
1011         }
1012
1013         /*
1014          * Windows is very strange. It allows read locks to be overlayed
1015          * (even over a write lock), but leaves the write lock in force until the first
1016          * unlock. It also reference counts the locks. This means the following sequence :
1017          *
1018          * process1                                      process2
1019          * ------------------------------------------------------------------------
1020          * WRITE LOCK : start = 2, len = 10
1021          *                                            READ LOCK: start =0, len = 10 - FAIL
1022          * READ LOCK : start = 0, len = 14 
1023          *                                            READ LOCK: start =0, len = 10 - FAIL
1024          * UNLOCK : start = 2, len = 10
1025          *                                            READ LOCK: start =0, len = 10 - OK
1026          *
1027          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1028          * would leave a single read lock over the 0-14 region.
1029          */
1030         
1031         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1032                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1033                 return False;
1034         }
1035
1036         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
1037                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1038                 talloc_destroy(l_ctx);
1039                 return False;
1040         }
1041
1042         /*
1043          * Create the initial list entry containing the
1044          * lock we want to add.
1045          */
1046
1047         ZERO_STRUCTP(ll);
1048         ll->start = offset;
1049         ll->size = count;
1050
1051         DLIST_ADD(llist, ll);
1052
1053         /*
1054          * The following call calculates if there are any
1055          * overlapping locks held by this process on
1056          * fd's open on the same file and splits this list
1057          * into a list of lock ranges that do not overlap with existing
1058          * POSIX locks.
1059          */
1060
1061         llist = posix_lock_list(l_ctx,
1062                                 llist,
1063                                 lock_ctx, /* Lock context llist belongs to. */
1064                                 fsp,
1065                                 plocks,
1066                                 num_locks);
1067
1068         /*
1069          * Add the POSIX locks on the list of ranges returned.
1070          * As the lock is supposed to be added atomically, we need to
1071          * back out all the locks if any one of these calls fail.
1072          */
1073
1074         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1075                 offset = ll->start;
1076                 count = ll->size;
1077
1078                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1079                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1080
1081                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1082                         *errno_ret = errno;
1083                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1084                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1085                         ret = False;
1086                         break;
1087                 }
1088         }
1089
1090         if (!ret) {
1091
1092                 /*
1093                  * Back out all the POSIX locks we have on fail.
1094                  */
1095
1096                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1097                         offset = ll->start;
1098                         count = ll->size;
1099
1100                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1101                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1102
1103                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1104                 }
1105         } else {
1106                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1107                 increment_windows_lock_ref_count(fsp);
1108         }
1109
1110         talloc_destroy(l_ctx);
1111         return ret;
1112 }
1113
1114 /****************************************************************************
1115  POSIX function to release a lock. Returns True if the
1116  lock could be released, False if not.
1117 ****************************************************************************/
1118
1119 bool release_posix_lock_windows_flavour(files_struct *fsp,
1120                                 uint64_t u_offset,
1121                                 uint64_t u_count,
1122                                 enum brl_type deleted_lock_type,
1123                                 const struct lock_context *lock_ctx,
1124                                 const struct lock_struct *plocks,
1125                                 int num_locks)
1126 {
1127         SMB_OFF_T offset;
1128         SMB_OFF_T count;
1129         bool ret = True;
1130         TALLOC_CTX *ul_ctx = NULL;
1131         struct lock_list *ulist = NULL;
1132         struct lock_list *ul = NULL;
1133
1134         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1135                  "count = %.0f\n", fsp_str_dbg(fsp),
1136                  (double)u_offset, (double)u_count));
1137
1138         /* Remember the number of Windows locks we have on this dev/ino pair. */
1139         decrement_windows_lock_ref_count(fsp);
1140
1141         /*
1142          * If the requested lock won't fit in the POSIX range, we will
1143          * pretend it was successful.
1144          */
1145
1146         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1147                 return True;
1148         }
1149
1150         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1151                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1152                 return False;
1153         }
1154
1155         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1156                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1157                 talloc_destroy(ul_ctx);
1158                 return False;
1159         }
1160
1161         /*
1162          * Create the initial list entry containing the
1163          * lock we want to remove.
1164          */
1165
1166         ZERO_STRUCTP(ul);
1167         ul->start = offset;
1168         ul->size = count;
1169
1170         DLIST_ADD(ulist, ul);
1171
1172         /*
1173          * The following call calculates if there are any
1174          * overlapping locks held by this process on
1175          * fd's open on the same file and creates a
1176          * list of unlock ranges that will allow
1177          * POSIX lock ranges to remain on the file whilst the
1178          * unlocks are performed.
1179          */
1180
1181         ulist = posix_lock_list(ul_ctx,
1182                                 ulist,
1183                                 lock_ctx, /* Lock context ulist belongs to. */
1184                                 fsp,
1185                                 plocks,
1186                                 num_locks);
1187
1188         /*
1189          * If there were any overlapped entries (list is > 1 or size or start have changed),
1190          * and the lock_type we just deleted from
1191          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1192          * the POSIX lock to a read lock. This allows any overlapping read locks
1193          * to be atomically maintained.
1194          */
1195
1196         if (deleted_lock_type == WRITE_LOCK &&
1197                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1198
1199                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1200                         (double)offset, (double)count ));
1201
1202                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1203                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1204                         talloc_destroy(ul_ctx);
1205                         return False;
1206                 }
1207         }
1208
1209         /*
1210          * Release the POSIX locks on the list of ranges returned.
1211          */
1212
1213         for(; ulist; ulist = ulist->next) {
1214                 offset = ulist->start;
1215                 count = ulist->size;
1216
1217                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1218                         (double)offset, (double)count ));
1219
1220                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1221                         ret = False;
1222                 }
1223         }
1224
1225         talloc_destroy(ul_ctx);
1226         return ret;
1227 }
1228
1229 /****************************************************************************
1230  Next - the functions that deal with mapping CIFS POSIX locks onto
1231  the underlying system POSIX locks.
1232 ****************************************************************************/
1233
1234 /****************************************************************************
1235  POSIX function to acquire a lock. Returns True if the
1236  lock could be granted, False if not.
1237  As POSIX locks don't stack or conflict (they just overwrite)
1238  we can map the requested lock directly onto a system one. We
1239  know it doesn't conflict with locks on other contexts as the
1240  upper layer would have refused it.
1241 ****************************************************************************/
1242
1243 bool set_posix_lock_posix_flavour(files_struct *fsp,
1244                         uint64_t u_offset,
1245                         uint64_t u_count,
1246                         enum brl_type lock_type,
1247                         int *errno_ret)
1248 {
1249         SMB_OFF_T offset;
1250         SMB_OFF_T count;
1251         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1252
1253         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1254                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1255                  (double)u_offset, (double)u_count,
1256                  posix_lock_type_name(lock_type)));
1257
1258         /*
1259          * If the requested lock won't fit in the POSIX range, we will
1260          * pretend it was successful.
1261          */
1262
1263         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1264                 return True;
1265         }
1266
1267         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1268                 *errno_ret = errno;
1269                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1270                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1271                 return False;
1272         }
1273         return True;
1274 }
1275
1276 /****************************************************************************
1277  POSIX function to release a lock. Returns True if the
1278  lock could be released, False if not.
1279  We are given a complete lock state from the upper layer which is what the lock
1280  state should be after the unlock has already been done, so what
1281  we do is punch out holes in the unlock range where locks owned by this process
1282  have a different lock context.
1283 ****************************************************************************/
1284
1285 bool release_posix_lock_posix_flavour(files_struct *fsp,
1286                                 uint64_t u_offset,
1287                                 uint64_t u_count,
1288                                 const struct lock_context *lock_ctx,
1289                                 const struct lock_struct *plocks,
1290                                 int num_locks)
1291 {
1292         bool ret = True;
1293         SMB_OFF_T offset;
1294         SMB_OFF_T count;
1295         TALLOC_CTX *ul_ctx = NULL;
1296         struct lock_list *ulist = NULL;
1297         struct lock_list *ul = NULL;
1298
1299         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1300                  "count = %.0f\n", fsp_str_dbg(fsp),
1301                  (double)u_offset, (double)u_count));
1302
1303         /*
1304          * If the requested lock won't fit in the POSIX range, we will
1305          * pretend it was successful.
1306          */
1307
1308         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1309                 return True;
1310         }
1311
1312         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1313                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1314                 return False;
1315         }
1316
1317         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1318                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1319                 talloc_destroy(ul_ctx);
1320                 return False;
1321         }
1322
1323         /*
1324          * Create the initial list entry containing the
1325          * lock we want to remove.
1326          */
1327
1328         ZERO_STRUCTP(ul);
1329         ul->start = offset;
1330         ul->size = count;
1331
1332         DLIST_ADD(ulist, ul);
1333
1334         /*
1335          * Walk the given array creating a linked list
1336          * of unlock requests.
1337          */
1338
1339         ulist = posix_lock_list(ul_ctx,
1340                                 ulist,
1341                                 lock_ctx, /* Lock context ulist belongs to. */
1342                                 fsp,
1343                                 plocks,
1344                                 num_locks);
1345
1346         /*
1347          * Release the POSIX locks on the list of ranges returned.
1348          */
1349
1350         for(; ulist; ulist = ulist->next) {
1351                 offset = ulist->start;
1352                 count = ulist->size;
1353
1354                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1355                         (double)offset, (double)count ));
1356
1357                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1358                         ret = False;
1359                 }
1360         }
1361
1362         talloc_destroy(ul_ctx);
1363         return ret;
1364 }