s3-build: use dbwrap.h only where needed.
[samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "dbwrap.h"
26
27 #undef DBGC_CLASS
28 #define DBGC_CLASS DBGC_LOCKING
29
30 /*
31  * The pending close database handle.
32  */
33
34 static struct db_context *posix_pending_close_db;
35
36 /****************************************************************************
37  First - the functions that deal with the underlying system locks - these
38  functions are used no matter if we're mapping CIFS Windows locks or CIFS
39  POSIX locks onto POSIX.
40 ****************************************************************************/
41
42 /****************************************************************************
43  Utility function to map a lock type correctly depending on the open
44  mode of a file.
45 ****************************************************************************/
46
47 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
48 {
49         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
50                 /*
51                  * Many UNIX's cannot get a write lock on a file opened read-only.
52                  * Win32 locking semantics allow this.
53                  * Do the best we can and attempt a read-only lock.
54                  */
55                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
56                 return F_RDLCK;
57         }
58
59         /*
60          * This return should be the most normal, as we attempt
61          * to always open files read/write.
62          */
63
64         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
65 }
66
67 /****************************************************************************
68  Debugging aid :-).
69 ****************************************************************************/
70
71 static const char *posix_lock_type_name(int lock_type)
72 {
73         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
74 }
75
76 /****************************************************************************
77  Check to see if the given unsigned lock range is within the possible POSIX
78  range. Modifies the given args to be in range if possible, just returns
79  False if not.
80 ****************************************************************************/
81
82 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
83                                 uint64_t u_offset, uint64_t u_count)
84 {
85         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
86         SMB_OFF_T count = (SMB_OFF_T)u_count;
87
88         /*
89          * For the type of system we are, attempt to
90          * find the maximum positive lock offset as an SMB_OFF_T.
91          */
92
93 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
94
95         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
96
97 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
98
99         /*
100          * In this case SMB_OFF_T is 64 bits,
101          * and the underlying system can handle 64 bit signed locks.
102          */
103
104         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
105         SMB_OFF_T mask = (mask2<<1);
106         SMB_OFF_T max_positive_lock_offset = ~mask;
107
108 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
109
110         /*
111          * In this case either SMB_OFF_T is 32 bits,
112          * or the underlying system cannot handle 64 bit signed locks.
113          * All offsets & counts must be 2^31 or less.
114          */
115
116         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
117
118 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
119
120         /*
121          * POSIX locks of length zero mean lock to end-of-file.
122          * Win32 locks of length zero are point probes. Ignore
123          * any Win32 locks of length zero. JRA.
124          */
125
126         if (count == (SMB_OFF_T)0) {
127                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
128                 return False;
129         }
130
131         /*
132          * If the given offset was > max_positive_lock_offset then we cannot map this at all
133          * ignore this lock.
134          */
135
136         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
137                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
138                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
139                 return False;
140         }
141
142         /*
143          * We must truncate the count to less than max_positive_lock_offset.
144          */
145
146         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
147                 count = max_positive_lock_offset;
148         }
149
150         /*
151          * Truncate count to end at max lock offset.
152          */
153
154         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
155                 count = max_positive_lock_offset - offset;
156         }
157
158         /*
159          * If we ate all the count, ignore this lock.
160          */
161
162         if (count == 0) {
163                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
164                                 (double)u_offset, (double)u_count ));
165                 return False;
166         }
167
168         /*
169          * The mapping was successful.
170          */
171
172         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
173                         (double)offset, (double)count ));
174
175         *offset_out = offset;
176         *count_out = count;
177         
178         return True;
179 }
180
181 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
182                        struct files_struct *fsp, int op, SMB_OFF_T offset,
183                        SMB_OFF_T count, int type)
184 {
185         VFS_FIND(lock);
186         return handle->fns->lock(handle, fsp, op, offset, count, type);
187 }
188
189 /****************************************************************************
190  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
191  broken NFS implementations.
192 ****************************************************************************/
193
194 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
195 {
196         bool ret;
197
198         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
199
200         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
201
202         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
203
204                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
205                                         (double)offset,(double)count));
206                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
207                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
208
209                 /*
210                  * If the offset is > 0x7FFFFFFF then this will cause problems on
211                  * 32 bit NFS mounted filesystems. Just ignore it.
212                  */
213
214                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
215                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
216                         return True;
217                 }
218
219                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
220                         /* 32 bit NFS file system, retry with smaller offset */
221                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
222                         errno = 0;
223                         count &= 0x7fffffff;
224                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
225                 }
226         }
227
228         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
229         return ret;
230 }
231
232 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
233                           struct files_struct *fsp, SMB_OFF_T *poffset,
234                           SMB_OFF_T *pcount, int *ptype, pid_t *ppid)
235 {
236         VFS_FIND(getlock);
237         return handle->fns->getlock(handle, fsp, poffset, pcount, ptype, ppid);
238 }
239
240 /****************************************************************************
241  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
242  broken NFS implementations.
243 ****************************************************************************/
244
245 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
246 {
247         pid_t pid;
248         bool ret;
249
250         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
251                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
252
253         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
254
255         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
256
257                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
258                                         (double)*poffset,(double)*pcount));
259                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
260                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
261
262                 /*
263                  * If the offset is > 0x7FFFFFFF then this will cause problems on
264                  * 32 bit NFS mounted filesystems. Just ignore it.
265                  */
266
267                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
268                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
269                         return True;
270                 }
271
272                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
273                         /* 32 bit NFS file system, retry with smaller offset */
274                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
275                         errno = 0;
276                         *pcount &= 0x7fffffff;
277                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
278                 }
279         }
280
281         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
282         return ret;
283 }
284
285 /****************************************************************************
286  POSIX function to see if a file region is locked. Returns True if the
287  region is locked, False otherwise.
288 ****************************************************************************/
289
290 bool is_posix_locked(files_struct *fsp,
291                         uint64_t *pu_offset,
292                         uint64_t *pu_count,
293                         enum brl_type *plock_type,
294                         enum brl_flavour lock_flav)
295 {
296         SMB_OFF_T offset;
297         SMB_OFF_T count;
298         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
299
300         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
301                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
302                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
303
304         /*
305          * If the requested lock won't fit in the POSIX range, we will
306          * never set it, so presume it is not locked.
307          */
308
309         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
310                 return False;
311         }
312
313         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
314                 return False;
315         }
316
317         if (posix_lock_type == F_UNLCK) {
318                 return False;
319         }
320
321         if (lock_flav == POSIX_LOCK) {
322                 /* Only POSIX lock queries need to know the details. */
323                 *pu_offset = (uint64_t)offset;
324                 *pu_count = (uint64_t)count;
325                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
326         }
327         return True;
328 }
329
330 /****************************************************************************
331  Next - the functions that deal with in memory database storing representations
332  of either Windows CIFS locks or POSIX CIFS locks.
333 ****************************************************************************/
334
335 /* The key used in the in-memory POSIX databases. */
336
337 struct lock_ref_count_key {
338         struct file_id id;
339         char r;
340 }; 
341
342 /*******************************************************************
343  Form a static locking key for a dev/inode pair for the lock ref count
344 ******************************************************************/
345
346 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
347                                           struct lock_ref_count_key *tmp)
348 {
349         ZERO_STRUCTP(tmp);
350         tmp->id = fsp->file_id;
351         tmp->r = 'r';
352         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
353 }
354
355 /*******************************************************************
356  Convenience function to get an fd_array key from an fsp.
357 ******************************************************************/
358
359 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
360 {
361         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
362 }
363
364 /*******************************************************************
365  Create the in-memory POSIX lock databases.
366 ********************************************************************/
367
368 bool posix_locking_init(bool read_only)
369 {
370         if (posix_pending_close_db != NULL) {
371                 return true;
372         }
373
374         posix_pending_close_db = db_open_rbt(NULL);
375
376         if (posix_pending_close_db == NULL) {
377                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
378                 return false;
379         }
380
381         return true;
382 }
383
384 /*******************************************************************
385  Delete the in-memory POSIX lock databases.
386 ********************************************************************/
387
388 bool posix_locking_end(void)
389 {
390         /*
391          * Shouldn't we close all fd's here?
392          */
393         TALLOC_FREE(posix_pending_close_db);
394         return true;
395 }
396
397 /****************************************************************************
398  Next - the functions that deal with storing fd's that have outstanding
399  POSIX locks when closed.
400 ****************************************************************************/
401
402 /****************************************************************************
403  The records in posix_pending_close_tdb are composed of an array of ints
404  keyed by dev/ino pair.
405  The first int is a reference count of the number of outstanding locks on
406  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
407  were open on this dev/ino pair that should have been closed, but can't as
408  the lock ref count is non zero.
409 ****************************************************************************/
410
411 /****************************************************************************
412  Keep a reference count of the number of Windows locks open on this dev/ino
413  pair. Creates entry if it doesn't exist.
414 ****************************************************************************/
415
416 static void increment_windows_lock_ref_count(files_struct *fsp)
417 {
418         struct lock_ref_count_key tmp;
419         struct db_record *rec;
420         int lock_ref_count = 0;
421         NTSTATUS status;
422
423         rec = posix_pending_close_db->fetch_locked(
424                 posix_pending_close_db, talloc_tos(),
425                 locking_ref_count_key_fsp(fsp, &tmp));
426
427         SMB_ASSERT(rec != NULL);
428
429         if (rec->value.dptr != NULL) {
430                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
431                 memcpy(&lock_ref_count, rec->value.dptr,
432                        sizeof(lock_ref_count));
433         }
434
435         lock_ref_count++;
436
437         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
438                                                sizeof(lock_ref_count)), 0);
439
440         SMB_ASSERT(NT_STATUS_IS_OK(status));
441
442         TALLOC_FREE(rec);
443
444         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
445                   fsp_str_dbg(fsp), lock_ref_count));
446 }
447
448 /****************************************************************************
449  Bulk delete - subtract as many locks as we've just deleted.
450 ****************************************************************************/
451
452 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
453 {
454         struct lock_ref_count_key tmp;
455         struct db_record *rec;
456         int lock_ref_count = 0;
457         NTSTATUS status;
458
459         rec = posix_pending_close_db->fetch_locked(
460                 posix_pending_close_db, talloc_tos(),
461                 locking_ref_count_key_fsp(fsp, &tmp));
462
463         SMB_ASSERT((rec != NULL)
464                    && (rec->value.dptr != NULL)
465                    && (rec->value.dsize == sizeof(lock_ref_count)));
466
467         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
468
469         SMB_ASSERT(lock_ref_count > 0);
470
471         lock_ref_count -= dcount;
472
473         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
474                                                sizeof(lock_ref_count)), 0);
475
476         SMB_ASSERT(NT_STATUS_IS_OK(status));
477
478         TALLOC_FREE(rec);
479
480         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
481                   fsp_str_dbg(fsp), lock_ref_count));
482 }
483
484 static void decrement_windows_lock_ref_count(files_struct *fsp)
485 {
486         reduce_windows_lock_ref_count(fsp, 1);
487 }
488
489 /****************************************************************************
490  Fetch the lock ref count.
491 ****************************************************************************/
492
493 static int get_windows_lock_ref_count(files_struct *fsp)
494 {
495         struct lock_ref_count_key tmp;
496         TDB_DATA dbuf;
497         int res;
498         int lock_ref_count = 0;
499
500         res = posix_pending_close_db->fetch(
501                 posix_pending_close_db, talloc_tos(),
502                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
503
504         SMB_ASSERT(res == 0);
505
506         if (dbuf.dsize != 0) {
507                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
508                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
509                 TALLOC_FREE(dbuf.dptr);
510         }
511
512         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
513                   fsp_str_dbg(fsp), lock_ref_count));
514
515         return lock_ref_count;
516 }
517
518 /****************************************************************************
519  Delete a lock_ref_count entry.
520 ****************************************************************************/
521
522 static void delete_windows_lock_ref_count(files_struct *fsp)
523 {
524         struct lock_ref_count_key tmp;
525         struct db_record *rec;
526
527         rec = posix_pending_close_db->fetch_locked(
528                 posix_pending_close_db, talloc_tos(),
529                 locking_ref_count_key_fsp(fsp, &tmp));
530
531         SMB_ASSERT(rec != NULL);
532
533         /* Not a bug if it doesn't exist - no locks were ever granted. */
534
535         rec->delete_rec(rec);
536         TALLOC_FREE(rec);
537
538         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
539                   fsp_str_dbg(fsp)));
540 }
541
542 /****************************************************************************
543  Add an fd to the pending close tdb.
544 ****************************************************************************/
545
546 static void add_fd_to_close_entry(files_struct *fsp)
547 {
548         struct db_record *rec;
549         uint8_t *new_data;
550         NTSTATUS status;
551
552         rec = posix_pending_close_db->fetch_locked(
553                 posix_pending_close_db, talloc_tos(),
554                 fd_array_key_fsp(fsp));
555
556         SMB_ASSERT(rec != NULL);
557
558         new_data = TALLOC_ARRAY(
559                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
560
561         SMB_ASSERT(new_data != NULL);
562
563         memcpy(new_data, rec->value.dptr, rec->value.dsize);
564         memcpy(new_data + rec->value.dsize,
565                &fsp->fh->fd, sizeof(fsp->fh->fd));
566
567         status = rec->store(
568                 rec, make_tdb_data(new_data,
569                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
570
571         SMB_ASSERT(NT_STATUS_IS_OK(status));
572
573         TALLOC_FREE(rec);
574
575         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
576                   fsp->fh->fd, fsp_str_dbg(fsp)));
577 }
578
579 /****************************************************************************
580  Remove all fd entries for a specific dev/inode pair from the tdb.
581 ****************************************************************************/
582
583 static void delete_close_entries(files_struct *fsp)
584 {
585         struct db_record *rec;
586
587         rec = posix_pending_close_db->fetch_locked(
588                 posix_pending_close_db, talloc_tos(),
589                 fd_array_key_fsp(fsp));
590
591         SMB_ASSERT(rec != NULL);
592         rec->delete_rec(rec);
593         TALLOC_FREE(rec);
594 }
595
596 /****************************************************************************
597  Get the array of POSIX pending close records for an open fsp. Returns number
598  of entries.
599 ****************************************************************************/
600
601 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
602                                               files_struct *fsp, int **entries)
603 {
604         TDB_DATA dbuf;
605         int res;
606
607         res = posix_pending_close_db->fetch(
608                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
609                 &dbuf);
610
611         SMB_ASSERT(res == 0);
612
613         if (dbuf.dsize == 0) {
614                 *entries = NULL;
615                 return 0;
616         }
617
618         *entries = (int *)dbuf.dptr;
619         return (size_t)(dbuf.dsize / sizeof(int));
620 }
621
622 /****************************************************************************
623  Deal with pending closes needed by POSIX locking support.
624  Note that posix_locking_close_file() is expected to have been called
625  to delete all locks on this fsp before this function is called.
626 ****************************************************************************/
627
628 int fd_close_posix(struct files_struct *fsp)
629 {
630         int saved_errno = 0;
631         int ret;
632         int *fd_array = NULL;
633         size_t count, i;
634
635         if (!lp_locking(fsp->conn->params) ||
636             !lp_posix_locking(fsp->conn->params))
637         {
638                 /*
639                  * No locking or POSIX to worry about or we want POSIX semantics
640                  * which will lose all locks on all fd's open on this dev/inode,
641                  * just close.
642                  */
643                 return close(fsp->fh->fd);
644         }
645
646         if (get_windows_lock_ref_count(fsp)) {
647
648                 /*
649                  * There are outstanding locks on this dev/inode pair on
650                  * other fds. Add our fd to the pending close tdb and set
651                  * fsp->fh->fd to -1.
652                  */
653
654                 add_fd_to_close_entry(fsp);
655                 return 0;
656         }
657
658         /*
659          * No outstanding locks. Get the pending close fd's
660          * from the tdb and close them all.
661          */
662
663         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
664
665         if (count) {
666                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
667                           (unsigned int)count));
668
669                 for(i = 0; i < count; i++) {
670                         if (close(fd_array[i]) == -1) {
671                                 saved_errno = errno;
672                         }
673                 }
674
675                 /*
676                  * Delete all fd's stored in the tdb
677                  * for this dev/inode pair.
678                  */
679
680                 delete_close_entries(fsp);
681         }
682
683         TALLOC_FREE(fd_array);
684
685         /* Don't need a lock ref count on this dev/ino anymore. */
686         delete_windows_lock_ref_count(fsp);
687
688         /*
689          * Finally close the fd associated with this fsp.
690          */
691
692         ret = close(fsp->fh->fd);
693
694         if (ret == 0 && saved_errno != 0) {
695                 errno = saved_errno;
696                 ret = -1;
697         }
698
699         return ret;
700 }
701
702 /****************************************************************************
703  Next - the functions that deal with the mapping CIFS Windows locks onto
704  the underlying system POSIX locks.
705 ****************************************************************************/
706
707 /*
708  * Structure used when splitting a lock range
709  * into a POSIX lock range. Doubly linked list.
710  */
711
712 struct lock_list {
713         struct lock_list *next;
714         struct lock_list *prev;
715         SMB_OFF_T start;
716         SMB_OFF_T size;
717 };
718
719 /****************************************************************************
720  Create a list of lock ranges that don't overlap a given range. Used in calculating
721  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
722  understand it :-).
723 ****************************************************************************/
724
725 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
726                                                 struct lock_list *lhead,
727                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
728                                                 files_struct *fsp,
729                                                 const struct lock_struct *plocks,
730                                                 int num_locks)
731 {
732         int i;
733
734         /*
735          * Check the current lock list on this dev/inode pair.
736          * Quit if the list is deleted.
737          */
738
739         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
740                 (double)lhead->start, (double)lhead->size ));
741
742         for (i=0; i<num_locks && lhead; i++) {
743                 const struct lock_struct *lock = &plocks[i];
744                 struct lock_list *l_curr;
745
746                 /* Ignore all but read/write locks. */
747                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
748                         continue;
749                 }
750
751                 /* Ignore locks not owned by this process. */
752                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
753                         continue;
754                 }
755
756                 /*
757                  * Walk the lock list, checking for overlaps. Note that
758                  * the lock list can expand within this loop if the current
759                  * range being examined needs to be split.
760                  */
761
762                 for (l_curr = lhead; l_curr;) {
763
764                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
765                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
766
767                         if ( (l_curr->start >= (lock->start + lock->size)) ||
768                                  (lock->start >= (l_curr->start + l_curr->size))) {
769
770                                 /* No overlap with existing lock - leave this range alone. */
771 /*********************************************
772                                              +---------+
773                                              | l_curr  |
774                                              +---------+
775                                 +-------+
776                                 | lock  |
777                                 +-------+
778 OR....
779              +---------+
780              |  l_curr |
781              +---------+
782 **********************************************/
783
784                                 DEBUG(10,(" no overlap case.\n" ));
785
786                                 l_curr = l_curr->next;
787
788                         } else if ( (l_curr->start >= lock->start) &&
789                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
790
791                                 /*
792                                  * This range is completely overlapped by this existing lock range
793                                  * and thus should have no effect. Delete it from the list.
794                                  */
795 /*********************************************
796                 +---------+
797                 |  l_curr |
798                 +---------+
799         +---------------------------+
800         |       lock                |
801         +---------------------------+
802 **********************************************/
803                                 /* Save the next pointer */
804                                 struct lock_list *ul_next = l_curr->next;
805
806                                 DEBUG(10,(" delete case.\n" ));
807
808                                 DLIST_REMOVE(lhead, l_curr);
809                                 if(lhead == NULL) {
810                                         break; /* No more list... */
811                                 }
812
813                                 l_curr = ul_next;
814                                 
815                         } else if ( (l_curr->start >= lock->start) &&
816                                                 (l_curr->start < lock->start + lock->size) &&
817                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
818
819                                 /*
820                                  * This range overlaps the existing lock range at the high end.
821                                  * Truncate by moving start to existing range end and reducing size.
822                                  */
823 /*********************************************
824                 +---------------+
825                 |  l_curr       |
826                 +---------------+
827         +---------------+
828         |    lock       |
829         +---------------+
830 BECOMES....
831                         +-------+
832                         | l_curr|
833                         +-------+
834 **********************************************/
835
836                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
837                                 l_curr->start = lock->start + lock->size;
838
839                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
840                                                                 (double)l_curr->start, (double)l_curr->size ));
841
842                                 l_curr = l_curr->next;
843
844                         } else if ( (l_curr->start < lock->start) &&
845                                                 (l_curr->start + l_curr->size > lock->start) &&
846                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
847
848                                 /*
849                                  * This range overlaps the existing lock range at the low end.
850                                  * Truncate by reducing size.
851                                  */
852 /*********************************************
853    +---------------+
854    |  l_curr       |
855    +---------------+
856            +---------------+
857            |    lock       |
858            +---------------+
859 BECOMES....
860    +-------+
861    | l_curr|
862    +-------+
863 **********************************************/
864
865                                 l_curr->size = lock->start - l_curr->start;
866
867                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
868                                                                 (double)l_curr->start, (double)l_curr->size ));
869
870                                 l_curr = l_curr->next;
871                 
872                         } else if ( (l_curr->start < lock->start) &&
873                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
874                                 /*
875                                  * Worst case scenario. Range completely overlaps an existing
876                                  * lock range. Split the request into two, push the new (upper) request
877                                  * into the dlink list, and continue with the entry after l_new (as we
878                                  * know that l_new will not overlap with this lock).
879                                  */
880 /*********************************************
881         +---------------------------+
882         |        l_curr             |
883         +---------------------------+
884                 +---------+
885                 | lock    |
886                 +---------+
887 BECOMES.....
888         +-------+         +---------+
889         | l_curr|         | l_new   |
890         +-------+         +---------+
891 **********************************************/
892                                 struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
893
894                                 if(l_new == NULL) {
895                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
896                                         return NULL; /* The talloc_destroy takes care of cleanup. */
897                                 }
898
899                                 ZERO_STRUCTP(l_new);
900                                 l_new->start = lock->start + lock->size;
901                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
902
903                                 /* Truncate the l_curr. */
904                                 l_curr->size = lock->start - l_curr->start;
905
906                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
907 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
908                                                                 (double)l_new->start, (double)l_new->size ));
909
910                                 /*
911                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
912                                  */
913                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
914
915                                 /* And move after the link we added. */
916                                 l_curr = l_new->next;
917
918                         } else {
919
920                                 /*
921                                  * This logic case should never happen. Ensure this is the
922                                  * case by forcing an abort.... Remove in production.
923                                  */
924                                 char *msg = NULL;
925
926                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
927 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
928                                         smb_panic(msg);
929                                 } else {
930                                         smb_panic("posix_lock_list");
931                                 }
932                         }
933                 } /* end for ( l_curr = lhead; l_curr;) */
934         } /* end for (i=0; i<num_locks && ul_head; i++) */
935
936         return lhead;
937 }
938
939 /****************************************************************************
940  POSIX function to acquire a lock. Returns True if the
941  lock could be granted, False if not.
942 ****************************************************************************/
943
944 bool set_posix_lock_windows_flavour(files_struct *fsp,
945                         uint64_t u_offset,
946                         uint64_t u_count,
947                         enum brl_type lock_type,
948                         const struct lock_context *lock_ctx,
949                         const struct lock_struct *plocks,
950                         int num_locks,
951                         int *errno_ret)
952 {
953         SMB_OFF_T offset;
954         SMB_OFF_T count;
955         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
956         bool ret = True;
957         size_t lock_count;
958         TALLOC_CTX *l_ctx = NULL;
959         struct lock_list *llist = NULL;
960         struct lock_list *ll = NULL;
961
962         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
963                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
964                  (double)u_offset, (double)u_count,
965                  posix_lock_type_name(lock_type)));
966
967         /*
968          * If the requested lock won't fit in the POSIX range, we will
969          * pretend it was successful.
970          */
971
972         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
973                 increment_windows_lock_ref_count(fsp);
974                 return True;
975         }
976
977         /*
978          * Windows is very strange. It allows read locks to be overlayed
979          * (even over a write lock), but leaves the write lock in force until the first
980          * unlock. It also reference counts the locks. This means the following sequence :
981          *
982          * process1                                      process2
983          * ------------------------------------------------------------------------
984          * WRITE LOCK : start = 2, len = 10
985          *                                            READ LOCK: start =0, len = 10 - FAIL
986          * READ LOCK : start = 0, len = 14 
987          *                                            READ LOCK: start =0, len = 10 - FAIL
988          * UNLOCK : start = 2, len = 10
989          *                                            READ LOCK: start =0, len = 10 - OK
990          *
991          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
992          * would leave a single read lock over the 0-14 region.
993          */
994         
995         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
996                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
997                 return False;
998         }
999
1000         if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
1001                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1002                 talloc_destroy(l_ctx);
1003                 return False;
1004         }
1005
1006         /*
1007          * Create the initial list entry containing the
1008          * lock we want to add.
1009          */
1010
1011         ZERO_STRUCTP(ll);
1012         ll->start = offset;
1013         ll->size = count;
1014
1015         DLIST_ADD(llist, ll);
1016
1017         /*
1018          * The following call calculates if there are any
1019          * overlapping locks held by this process on
1020          * fd's open on the same file and splits this list
1021          * into a list of lock ranges that do not overlap with existing
1022          * POSIX locks.
1023          */
1024
1025         llist = posix_lock_list(l_ctx,
1026                                 llist,
1027                                 lock_ctx, /* Lock context llist belongs to. */
1028                                 fsp,
1029                                 plocks,
1030                                 num_locks);
1031
1032         /*
1033          * Add the POSIX locks on the list of ranges returned.
1034          * As the lock is supposed to be added atomically, we need to
1035          * back out all the locks if any one of these calls fail.
1036          */
1037
1038         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1039                 offset = ll->start;
1040                 count = ll->size;
1041
1042                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1043                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1044
1045                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1046                         *errno_ret = errno;
1047                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1048                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1049                         ret = False;
1050                         break;
1051                 }
1052         }
1053
1054         if (!ret) {
1055
1056                 /*
1057                  * Back out all the POSIX locks we have on fail.
1058                  */
1059
1060                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1061                         offset = ll->start;
1062                         count = ll->size;
1063
1064                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1065                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1066
1067                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1068                 }
1069         } else {
1070                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1071                 increment_windows_lock_ref_count(fsp);
1072         }
1073
1074         talloc_destroy(l_ctx);
1075         return ret;
1076 }
1077
1078 /****************************************************************************
1079  POSIX function to release a lock. Returns True if the
1080  lock could be released, False if not.
1081 ****************************************************************************/
1082
1083 bool release_posix_lock_windows_flavour(files_struct *fsp,
1084                                 uint64_t u_offset,
1085                                 uint64_t u_count,
1086                                 enum brl_type deleted_lock_type,
1087                                 const struct lock_context *lock_ctx,
1088                                 const struct lock_struct *plocks,
1089                                 int num_locks)
1090 {
1091         SMB_OFF_T offset;
1092         SMB_OFF_T count;
1093         bool ret = True;
1094         TALLOC_CTX *ul_ctx = NULL;
1095         struct lock_list *ulist = NULL;
1096         struct lock_list *ul = NULL;
1097
1098         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1099                  "count = %.0f\n", fsp_str_dbg(fsp),
1100                  (double)u_offset, (double)u_count));
1101
1102         /* Remember the number of Windows locks we have on this dev/ino pair. */
1103         decrement_windows_lock_ref_count(fsp);
1104
1105         /*
1106          * If the requested lock won't fit in the POSIX range, we will
1107          * pretend it was successful.
1108          */
1109
1110         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1111                 return True;
1112         }
1113
1114         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1115                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1116                 return False;
1117         }
1118
1119         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1120                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1121                 talloc_destroy(ul_ctx);
1122                 return False;
1123         }
1124
1125         /*
1126          * Create the initial list entry containing the
1127          * lock we want to remove.
1128          */
1129
1130         ZERO_STRUCTP(ul);
1131         ul->start = offset;
1132         ul->size = count;
1133
1134         DLIST_ADD(ulist, ul);
1135
1136         /*
1137          * The following call calculates if there are any
1138          * overlapping locks held by this process on
1139          * fd's open on the same file and creates a
1140          * list of unlock ranges that will allow
1141          * POSIX lock ranges to remain on the file whilst the
1142          * unlocks are performed.
1143          */
1144
1145         ulist = posix_lock_list(ul_ctx,
1146                                 ulist,
1147                                 lock_ctx, /* Lock context ulist belongs to. */
1148                                 fsp,
1149                                 plocks,
1150                                 num_locks);
1151
1152         /*
1153          * If there were any overlapped entries (list is > 1 or size or start have changed),
1154          * and the lock_type we just deleted from
1155          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1156          * the POSIX lock to a read lock. This allows any overlapping read locks
1157          * to be atomically maintained.
1158          */
1159
1160         if (deleted_lock_type == WRITE_LOCK &&
1161                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1162
1163                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1164                         (double)offset, (double)count ));
1165
1166                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1167                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1168                         talloc_destroy(ul_ctx);
1169                         return False;
1170                 }
1171         }
1172
1173         /*
1174          * Release the POSIX locks on the list of ranges returned.
1175          */
1176
1177         for(; ulist; ulist = ulist->next) {
1178                 offset = ulist->start;
1179                 count = ulist->size;
1180
1181                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1182                         (double)offset, (double)count ));
1183
1184                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1185                         ret = False;
1186                 }
1187         }
1188
1189         talloc_destroy(ul_ctx);
1190         return ret;
1191 }
1192
1193 /****************************************************************************
1194  Next - the functions that deal with mapping CIFS POSIX locks onto
1195  the underlying system POSIX locks.
1196 ****************************************************************************/
1197
1198 /****************************************************************************
1199  POSIX function to acquire a lock. Returns True if the
1200  lock could be granted, False if not.
1201  As POSIX locks don't stack or conflict (they just overwrite)
1202  we can map the requested lock directly onto a system one. We
1203  know it doesn't conflict with locks on other contexts as the
1204  upper layer would have refused it.
1205 ****************************************************************************/
1206
1207 bool set_posix_lock_posix_flavour(files_struct *fsp,
1208                         uint64_t u_offset,
1209                         uint64_t u_count,
1210                         enum brl_type lock_type,
1211                         int *errno_ret)
1212 {
1213         SMB_OFF_T offset;
1214         SMB_OFF_T count;
1215         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1216
1217         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1218                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1219                  (double)u_offset, (double)u_count,
1220                  posix_lock_type_name(lock_type)));
1221
1222         /*
1223          * If the requested lock won't fit in the POSIX range, we will
1224          * pretend it was successful.
1225          */
1226
1227         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1228                 return True;
1229         }
1230
1231         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1232                 *errno_ret = errno;
1233                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1234                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1235                 return False;
1236         }
1237         return True;
1238 }
1239
1240 /****************************************************************************
1241  POSIX function to release a lock. Returns True if the
1242  lock could be released, False if not.
1243  We are given a complete lock state from the upper layer which is what the lock
1244  state should be after the unlock has already been done, so what
1245  we do is punch out holes in the unlock range where locks owned by this process
1246  have a different lock context.
1247 ****************************************************************************/
1248
1249 bool release_posix_lock_posix_flavour(files_struct *fsp,
1250                                 uint64_t u_offset,
1251                                 uint64_t u_count,
1252                                 const struct lock_context *lock_ctx,
1253                                 const struct lock_struct *plocks,
1254                                 int num_locks)
1255 {
1256         bool ret = True;
1257         SMB_OFF_T offset;
1258         SMB_OFF_T count;
1259         TALLOC_CTX *ul_ctx = NULL;
1260         struct lock_list *ulist = NULL;
1261         struct lock_list *ul = NULL;
1262
1263         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1264                  "count = %.0f\n", fsp_str_dbg(fsp),
1265                  (double)u_offset, (double)u_count));
1266
1267         /*
1268          * If the requested lock won't fit in the POSIX range, we will
1269          * pretend it was successful.
1270          */
1271
1272         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1273                 return True;
1274         }
1275
1276         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1277                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1278                 return False;
1279         }
1280
1281         if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
1282                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1283                 talloc_destroy(ul_ctx);
1284                 return False;
1285         }
1286
1287         /*
1288          * Create the initial list entry containing the
1289          * lock we want to remove.
1290          */
1291
1292         ZERO_STRUCTP(ul);
1293         ul->start = offset;
1294         ul->size = count;
1295
1296         DLIST_ADD(ulist, ul);
1297
1298         /*
1299          * Walk the given array creating a linked list
1300          * of unlock requests.
1301          */
1302
1303         ulist = posix_lock_list(ul_ctx,
1304                                 ulist,
1305                                 lock_ctx, /* Lock context ulist belongs to. */
1306                                 fsp,
1307                                 plocks,
1308                                 num_locks);
1309
1310         /*
1311          * Release the POSIX locks on the list of ranges returned.
1312          */
1313
1314         for(; ulist; ulist = ulist->next) {
1315                 offset = ulist->start;
1316                 count = ulist->size;
1317
1318                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1319                         (double)offset, (double)count ));
1320
1321                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1322                         ret = False;
1323                 }
1324         }
1325
1326         talloc_destroy(ul_ctx);
1327         return ret;
1328 }