s3:util: rename procid_equal() to serverid_equal()
[samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap/dbwrap.h"
28 #include "dbwrap/dbwrap_rbt.h"
29 #include "util_tdb.h"
30
31 #undef DBGC_CLASS
32 #define DBGC_CLASS DBGC_LOCKING
33
34 /*
35  * The pending close database handle.
36  */
37
38 static struct db_context *posix_pending_close_db;
39
40 /****************************************************************************
41  First - the functions that deal with the underlying system locks - these
42  functions are used no matter if we're mapping CIFS Windows locks or CIFS
43  POSIX locks onto POSIX.
44 ****************************************************************************/
45
46 /****************************************************************************
47  Utility function to map a lock type correctly depending on the open
48  mode of a file.
49 ****************************************************************************/
50
51 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
52 {
53         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
54                 /*
55                  * Many UNIX's cannot get a write lock on a file opened read-only.
56                  * Win32 locking semantics allow this.
57                  * Do the best we can and attempt a read-only lock.
58                  */
59                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
60                 return F_RDLCK;
61         }
62
63         /*
64          * This return should be the most normal, as we attempt
65          * to always open files read/write.
66          */
67
68         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
69 }
70
71 /****************************************************************************
72  Debugging aid :-).
73 ****************************************************************************/
74
75 static const char *posix_lock_type_name(int lock_type)
76 {
77         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
78 }
79
80 /****************************************************************************
81  Check to see if the given unsigned lock range is within the possible POSIX
82  range. Modifies the given args to be in range if possible, just returns
83  False if not.
84 ****************************************************************************/
85
86 static bool posix_lock_in_range(off_t *offset_out, off_t *count_out,
87                                 uint64_t u_offset, uint64_t u_count)
88 {
89         off_t offset = (off_t)u_offset;
90         off_t count = (off_t)u_count;
91
92         /*
93          * For the type of system we are, attempt to
94          * find the maximum positive lock offset as an off_t.
95          */
96
97 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
98
99         off_t max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
100 #else
101         /*
102          * In this case off_t is 64 bits,
103          * and the underlying system can handle 64 bit signed locks.
104          */
105
106         off_t mask2 = ((off_t)0x4) << (SMB_OFF_T_BITS-4);
107         off_t mask = (mask2<<1);
108         off_t max_positive_lock_offset = ~mask;
109
110 #endif
111         /*
112          * POSIX locks of length zero mean lock to end-of-file.
113          * Win32 locks of length zero are point probes. Ignore
114          * any Win32 locks of length zero. JRA.
115          */
116
117         if (count == (off_t)0) {
118                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
119                 return False;
120         }
121
122         /*
123          * If the given offset was > max_positive_lock_offset then we cannot map this at all
124          * ignore this lock.
125          */
126
127         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
128                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
129                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
130                 return False;
131         }
132
133         /*
134          * We must truncate the count to less than max_positive_lock_offset.
135          */
136
137         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
138                 count = max_positive_lock_offset;
139         }
140
141         /*
142          * Truncate count to end at max lock offset.
143          */
144
145         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
146                 count = max_positive_lock_offset - offset;
147         }
148
149         /*
150          * If we ate all the count, ignore this lock.
151          */
152
153         if (count == 0) {
154                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
155                                 (double)u_offset, (double)u_count ));
156                 return False;
157         }
158
159         /*
160          * The mapping was successful.
161          */
162
163         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
164                         (double)offset, (double)count ));
165
166         *offset_out = offset;
167         *count_out = count;
168         
169         return True;
170 }
171
172 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
173                        struct files_struct *fsp, int op, off_t offset,
174                        off_t count, int type)
175 {
176         VFS_FIND(lock);
177         return handle->fns->lock_fn(handle, fsp, op, offset, count, type);
178 }
179
180 /****************************************************************************
181  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
182  broken NFS implementations.
183 ****************************************************************************/
184
185 static bool posix_fcntl_lock(files_struct *fsp, int op, off_t offset, off_t count, int type)
186 {
187         bool ret;
188
189         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
190
191         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
192
193         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
194
195                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
196                                         (double)offset,(double)count));
197                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
198                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
199
200                 /*
201                  * If the offset is > 0x7FFFFFFF then this will cause problems on
202                  * 32 bit NFS mounted filesystems. Just ignore it.
203                  */
204
205                 if (offset & ~((off_t)0x7fffffff)) {
206                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
207                         return True;
208                 }
209
210                 if (count & ~((off_t)0x7fffffff)) {
211                         /* 32 bit NFS file system, retry with smaller offset */
212                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
213                         errno = 0;
214                         count &= 0x7fffffff;
215                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
216                 }
217         }
218
219         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
220         return ret;
221 }
222
223 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
224                           struct files_struct *fsp, off_t *poffset,
225                           off_t *pcount, int *ptype, pid_t *ppid)
226 {
227         VFS_FIND(getlock);
228         return handle->fns->getlock_fn(handle, fsp, poffset, pcount, ptype, 
229                                        ppid);
230 }
231
232 /****************************************************************************
233  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
234  broken NFS implementations.
235 ****************************************************************************/
236
237 static bool posix_fcntl_getlock(files_struct *fsp, off_t *poffset, off_t *pcount, int *ptype)
238 {
239         pid_t pid;
240         bool ret;
241
242         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
243                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
244
245         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
246
247         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
248
249                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
250                                         (double)*poffset,(double)*pcount));
251                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
252                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
253
254                 /*
255                  * If the offset is > 0x7FFFFFFF then this will cause problems on
256                  * 32 bit NFS mounted filesystems. Just ignore it.
257                  */
258
259                 if (*poffset & ~((off_t)0x7fffffff)) {
260                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
261                         return True;
262                 }
263
264                 if (*pcount & ~((off_t)0x7fffffff)) {
265                         /* 32 bit NFS file system, retry with smaller offset */
266                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
267                         errno = 0;
268                         *pcount &= 0x7fffffff;
269                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
270                 }
271         }
272
273         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
274         return ret;
275 }
276
277 /****************************************************************************
278  POSIX function to see if a file region is locked. Returns True if the
279  region is locked, False otherwise.
280 ****************************************************************************/
281
282 bool is_posix_locked(files_struct *fsp,
283                         uint64_t *pu_offset,
284                         uint64_t *pu_count,
285                         enum brl_type *plock_type,
286                         enum brl_flavour lock_flav)
287 {
288         off_t offset;
289         off_t count;
290         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
291
292         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
293                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
294                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
295
296         /*
297          * If the requested lock won't fit in the POSIX range, we will
298          * never set it, so presume it is not locked.
299          */
300
301         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
302                 return False;
303         }
304
305         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
306                 return False;
307         }
308
309         if (posix_lock_type == F_UNLCK) {
310                 return False;
311         }
312
313         if (lock_flav == POSIX_LOCK) {
314                 /* Only POSIX lock queries need to know the details. */
315                 *pu_offset = (uint64_t)offset;
316                 *pu_count = (uint64_t)count;
317                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
318         }
319         return True;
320 }
321
322 /****************************************************************************
323  Next - the functions that deal with in memory database storing representations
324  of either Windows CIFS locks or POSIX CIFS locks.
325 ****************************************************************************/
326
327 /* The key used in the in-memory POSIX databases. */
328
329 struct lock_ref_count_key {
330         struct file_id id;
331         char r;
332 }; 
333
334 /*******************************************************************
335  Form a static locking key for a dev/inode pair for the lock ref count
336 ******************************************************************/
337
338 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
339                                           struct lock_ref_count_key *tmp)
340 {
341         ZERO_STRUCTP(tmp);
342         tmp->id = fsp->file_id;
343         tmp->r = 'r';
344         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
345 }
346
347 /*******************************************************************
348  Convenience function to get an fd_array key from an fsp.
349 ******************************************************************/
350
351 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
352 {
353         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
354 }
355
356 /*******************************************************************
357  Create the in-memory POSIX lock databases.
358 ********************************************************************/
359
360 bool posix_locking_init(bool read_only)
361 {
362         if (posix_pending_close_db != NULL) {
363                 return true;
364         }
365
366         posix_pending_close_db = db_open_rbt(NULL);
367
368         if (posix_pending_close_db == NULL) {
369                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
370                 return false;
371         }
372
373         return true;
374 }
375
376 /*******************************************************************
377  Delete the in-memory POSIX lock databases.
378 ********************************************************************/
379
380 bool posix_locking_end(void)
381 {
382         /*
383          * Shouldn't we close all fd's here?
384          */
385         TALLOC_FREE(posix_pending_close_db);
386         return true;
387 }
388
389 /****************************************************************************
390  Next - the functions that deal with storing fd's that have outstanding
391  POSIX locks when closed.
392 ****************************************************************************/
393
394 /****************************************************************************
395  The records in posix_pending_close_db are composed of an array of
396  ints keyed by dev/ino pair. Those ints are the fd's that were open on
397  this dev/ino pair that should have been closed, but can't as the lock
398  ref count is non zero.
399 ****************************************************************************/
400
401 /****************************************************************************
402  Keep a reference count of the number of Windows locks open on this dev/ino
403  pair. Creates entry if it doesn't exist.
404 ****************************************************************************/
405
406 static void increment_windows_lock_ref_count(files_struct *fsp)
407 {
408         struct lock_ref_count_key tmp;
409         struct db_record *rec;
410         int lock_ref_count = 0;
411         NTSTATUS status;
412         TDB_DATA value;
413
414         rec = dbwrap_fetch_locked(
415                 posix_pending_close_db, talloc_tos(),
416                 locking_ref_count_key_fsp(fsp, &tmp));
417
418         SMB_ASSERT(rec != NULL);
419
420         value = dbwrap_record_get_value(rec);
421
422         if (value.dptr != NULL) {
423                 SMB_ASSERT(value.dsize == sizeof(lock_ref_count));
424                 memcpy(&lock_ref_count, value.dptr,
425                        sizeof(lock_ref_count));
426         }
427
428         lock_ref_count++;
429
430         status = dbwrap_record_store(rec,
431                                      make_tdb_data((uint8 *)&lock_ref_count,
432                                      sizeof(lock_ref_count)), 0);
433
434         SMB_ASSERT(NT_STATUS_IS_OK(status));
435
436         TALLOC_FREE(rec);
437
438         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
439                   fsp_str_dbg(fsp), lock_ref_count));
440 }
441
442 /****************************************************************************
443  Bulk delete - subtract as many locks as we've just deleted.
444 ****************************************************************************/
445
446 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
447 {
448         struct lock_ref_count_key tmp;
449         struct db_record *rec;
450         int lock_ref_count = 0;
451         NTSTATUS status;
452         TDB_DATA value;
453
454         rec = dbwrap_fetch_locked(
455                 posix_pending_close_db, talloc_tos(),
456                 locking_ref_count_key_fsp(fsp, &tmp));
457
458         if (rec == NULL) {
459                 DEBUG(0, ("reduce_windows_lock_ref_count: rec not found\n"));
460                 return;
461         }
462
463         value = dbwrap_record_get_value(rec);
464
465         if ((value.dptr == NULL) ||  (value.dsize != sizeof(lock_ref_count))) {
466                 DEBUG(0, ("reduce_windows_lock_ref_count: wrong value\n"));
467                 TALLOC_FREE(rec);
468                 return;
469         }
470
471         memcpy(&lock_ref_count, value.dptr, sizeof(lock_ref_count));
472
473         SMB_ASSERT(lock_ref_count > 0);
474
475         lock_ref_count -= dcount;
476
477         status = dbwrap_record_store(rec,
478                                      make_tdb_data((uint8 *)&lock_ref_count,
479                                      sizeof(lock_ref_count)), 0);
480
481         SMB_ASSERT(NT_STATUS_IS_OK(status));
482
483         TALLOC_FREE(rec);
484
485         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
486                   fsp_str_dbg(fsp), lock_ref_count));
487 }
488
489 static void decrement_windows_lock_ref_count(files_struct *fsp)
490 {
491         reduce_windows_lock_ref_count(fsp, 1);
492 }
493
494 /****************************************************************************
495  Fetch the lock ref count.
496 ****************************************************************************/
497
498 static int get_windows_lock_ref_count(files_struct *fsp)
499 {
500         struct lock_ref_count_key tmp;
501         TDB_DATA dbuf;
502         NTSTATUS status;
503         int lock_ref_count = 0;
504
505         status = dbwrap_fetch(
506                 posix_pending_close_db, talloc_tos(),
507                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
508
509         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
510                 goto done;
511         }
512
513         if (!NT_STATUS_IS_OK(status)) {
514                 DEBUG(0, ("get_windows_lock_ref_count: Error fetching "
515                           "lock ref count for file %s: %s\n",
516                           fsp_str_dbg(fsp), nt_errstr(status)));
517                 goto done;
518         }
519
520         if (dbuf.dsize != sizeof(lock_ref_count)) {
521                 DEBUG(0, ("get_windows_lock_ref_count: invalid entry "
522                           "in lock ref count record for file %s: "
523                           "(invalid data size %u)\n",
524                           fsp_str_dbg(fsp), (unsigned int)dbuf.dsize));
525                 goto done;
526         }
527
528         memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
529         TALLOC_FREE(dbuf.dptr);
530
531 done:
532         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
533                   fsp_str_dbg(fsp), lock_ref_count));
534
535         return lock_ref_count;
536 }
537
538 /****************************************************************************
539  Delete a lock_ref_count entry.
540 ****************************************************************************/
541
542 static void delete_windows_lock_ref_count(files_struct *fsp)
543 {
544         struct lock_ref_count_key tmp;
545         struct db_record *rec;
546
547         rec = dbwrap_fetch_locked(
548                 posix_pending_close_db, talloc_tos(),
549                 locking_ref_count_key_fsp(fsp, &tmp));
550
551         SMB_ASSERT(rec != NULL);
552
553         /* Not a bug if it doesn't exist - no locks were ever granted. */
554
555         dbwrap_record_delete(rec);
556         TALLOC_FREE(rec);
557
558         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
559                   fsp_str_dbg(fsp)));
560 }
561
562 /****************************************************************************
563  Add an fd to the pending close tdb.
564 ****************************************************************************/
565
566 static void add_fd_to_close_entry(files_struct *fsp)
567 {
568         struct db_record *rec;
569         int *fds;
570         size_t num_fds;
571         NTSTATUS status;
572         TDB_DATA value;
573
574         rec = dbwrap_fetch_locked(
575                 posix_pending_close_db, talloc_tos(),
576                 fd_array_key_fsp(fsp));
577
578         SMB_ASSERT(rec != NULL);
579
580         value = dbwrap_record_get_value(rec);
581         SMB_ASSERT((value.dsize % sizeof(int)) == 0);
582
583         num_fds = value.dsize / sizeof(int);
584         fds = talloc_array(rec, int, num_fds+1);
585
586         SMB_ASSERT(fds != NULL);
587
588         memcpy(fds, value.dptr, value.dsize);
589         fds[num_fds] = fsp->fh->fd;
590
591         status = dbwrap_record_store(
592                 rec, make_tdb_data((uint8_t *)fds, talloc_get_size(fds)), 0);
593
594         SMB_ASSERT(NT_STATUS_IS_OK(status));
595
596         TALLOC_FREE(rec);
597
598         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
599                   fsp->fh->fd, fsp_str_dbg(fsp)));
600 }
601
602 /****************************************************************************
603  Remove all fd entries for a specific dev/inode pair from the tdb.
604 ****************************************************************************/
605
606 static void delete_close_entries(files_struct *fsp)
607 {
608         struct db_record *rec;
609
610         rec = dbwrap_fetch_locked(
611                 posix_pending_close_db, talloc_tos(),
612                 fd_array_key_fsp(fsp));
613
614         SMB_ASSERT(rec != NULL);
615         dbwrap_record_delete(rec);
616         TALLOC_FREE(rec);
617 }
618
619 /****************************************************************************
620  Get the array of POSIX pending close records for an open fsp. Returns number
621  of entries.
622 ****************************************************************************/
623
624 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
625                                               files_struct *fsp, int **entries)
626 {
627         TDB_DATA dbuf;
628         NTSTATUS status;
629
630         status = dbwrap_fetch(
631                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
632                 &dbuf);
633
634         if (NT_STATUS_EQUAL(status, NT_STATUS_NOT_FOUND)) {
635                 *entries = NULL;
636                 return 0;
637         }
638
639         SMB_ASSERT(NT_STATUS_IS_OK(status));
640
641         if (dbuf.dsize == 0) {
642                 *entries = NULL;
643                 return 0;
644         }
645
646         *entries = (int *)dbuf.dptr;
647         return (size_t)(dbuf.dsize / sizeof(int));
648 }
649
650 /****************************************************************************
651  Deal with pending closes needed by POSIX locking support.
652  Note that posix_locking_close_file() is expected to have been called
653  to delete all locks on this fsp before this function is called.
654 ****************************************************************************/
655
656 int fd_close_posix(struct files_struct *fsp)
657 {
658         int saved_errno = 0;
659         int ret;
660         int *fd_array = NULL;
661         size_t count, i;
662
663         if (!lp_locking(fsp->conn->params) ||
664             !lp_posix_locking(fsp->conn->params))
665         {
666                 /*
667                  * No locking or POSIX to worry about or we want POSIX semantics
668                  * which will lose all locks on all fd's open on this dev/inode,
669                  * just close.
670                  */
671                 return close(fsp->fh->fd);
672         }
673
674         if (get_windows_lock_ref_count(fsp)) {
675
676                 /*
677                  * There are outstanding locks on this dev/inode pair on
678                  * other fds. Add our fd to the pending close tdb and set
679                  * fsp->fh->fd to -1.
680                  */
681
682                 add_fd_to_close_entry(fsp);
683                 return 0;
684         }
685
686         /*
687          * No outstanding locks. Get the pending close fd's
688          * from the tdb and close them all.
689          */
690
691         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
692
693         if (count) {
694                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
695                           (unsigned int)count));
696
697                 for(i = 0; i < count; i++) {
698                         if (close(fd_array[i]) == -1) {
699                                 saved_errno = errno;
700                         }
701                 }
702
703                 /*
704                  * Delete all fd's stored in the tdb
705                  * for this dev/inode pair.
706                  */
707
708                 delete_close_entries(fsp);
709         }
710
711         TALLOC_FREE(fd_array);
712
713         /* Don't need a lock ref count on this dev/ino anymore. */
714         delete_windows_lock_ref_count(fsp);
715
716         /*
717          * Finally close the fd associated with this fsp.
718          */
719
720         ret = close(fsp->fh->fd);
721
722         if (ret == 0 && saved_errno != 0) {
723                 errno = saved_errno;
724                 ret = -1;
725         }
726
727         return ret;
728 }
729
730 /****************************************************************************
731  Next - the functions that deal with the mapping CIFS Windows locks onto
732  the underlying system POSIX locks.
733 ****************************************************************************/
734
735 /*
736  * Structure used when splitting a lock range
737  * into a POSIX lock range. Doubly linked list.
738  */
739
740 struct lock_list {
741         struct lock_list *next;
742         struct lock_list *prev;
743         off_t start;
744         off_t size;
745 };
746
747 /****************************************************************************
748  Create a list of lock ranges that don't overlap a given range. Used in calculating
749  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
750  understand it :-).
751 ****************************************************************************/
752
753 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
754                                                 struct lock_list *lhead,
755                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
756                                                 files_struct *fsp,
757                                                 const struct lock_struct *plocks,
758                                                 int num_locks)
759 {
760         int i;
761
762         /*
763          * Check the current lock list on this dev/inode pair.
764          * Quit if the list is deleted.
765          */
766
767         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
768                 (double)lhead->start, (double)lhead->size ));
769
770         for (i=0; i<num_locks && lhead; i++) {
771                 const struct lock_struct *lock = &plocks[i];
772                 struct lock_list *l_curr;
773
774                 /* Ignore all but read/write locks. */
775                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
776                         continue;
777                 }
778
779                 /* Ignore locks not owned by this process. */
780                 if (!serverid_equal(&lock->context.pid, &lock_ctx->pid)) {
781                         continue;
782                 }
783
784                 /*
785                  * Walk the lock list, checking for overlaps. Note that
786                  * the lock list can expand within this loop if the current
787                  * range being examined needs to be split.
788                  */
789
790                 for (l_curr = lhead; l_curr;) {
791
792                         DEBUG(10,("posix_lock_list: lock: fnum=%llu: start=%.0f,size=%.0f:type=%s",
793                                 (unsigned long long)lock->fnum,
794                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
795
796                         if ( (l_curr->start >= (lock->start + lock->size)) ||
797                                  (lock->start >= (l_curr->start + l_curr->size))) {
798
799                                 /* No overlap with existing lock - leave this range alone. */
800 /*********************************************
801                                              +---------+
802                                              | l_curr  |
803                                              +---------+
804                                 +-------+
805                                 | lock  |
806                                 +-------+
807 OR....
808              +---------+
809              |  l_curr |
810              +---------+
811 **********************************************/
812
813                                 DEBUG(10,(" no overlap case.\n" ));
814
815                                 l_curr = l_curr->next;
816
817                         } else if ( (l_curr->start >= lock->start) &&
818                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
819
820                                 /*
821                                  * This range is completely overlapped by this existing lock range
822                                  * and thus should have no effect. Delete it from the list.
823                                  */
824 /*********************************************
825                 +---------+
826                 |  l_curr |
827                 +---------+
828         +---------------------------+
829         |       lock                |
830         +---------------------------+
831 **********************************************/
832                                 /* Save the next pointer */
833                                 struct lock_list *ul_next = l_curr->next;
834
835                                 DEBUG(10,(" delete case.\n" ));
836
837                                 DLIST_REMOVE(lhead, l_curr);
838                                 if(lhead == NULL) {
839                                         break; /* No more list... */
840                                 }
841
842                                 l_curr = ul_next;
843                                 
844                         } else if ( (l_curr->start >= lock->start) &&
845                                                 (l_curr->start < lock->start + lock->size) &&
846                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
847
848                                 /*
849                                  * This range overlaps the existing lock range at the high end.
850                                  * Truncate by moving start to existing range end and reducing size.
851                                  */
852 /*********************************************
853                 +---------------+
854                 |  l_curr       |
855                 +---------------+
856         +---------------+
857         |    lock       |
858         +---------------+
859 BECOMES....
860                         +-------+
861                         | l_curr|
862                         +-------+
863 **********************************************/
864
865                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
866                                 l_curr->start = lock->start + lock->size;
867
868                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
869                                                                 (double)l_curr->start, (double)l_curr->size ));
870
871                                 l_curr = l_curr->next;
872
873                         } else if ( (l_curr->start < lock->start) &&
874                                                 (l_curr->start + l_curr->size > lock->start) &&
875                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
876
877                                 /*
878                                  * This range overlaps the existing lock range at the low end.
879                                  * Truncate by reducing size.
880                                  */
881 /*********************************************
882    +---------------+
883    |  l_curr       |
884    +---------------+
885            +---------------+
886            |    lock       |
887            +---------------+
888 BECOMES....
889    +-------+
890    | l_curr|
891    +-------+
892 **********************************************/
893
894                                 l_curr->size = lock->start - l_curr->start;
895
896                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
897                                                                 (double)l_curr->start, (double)l_curr->size ));
898
899                                 l_curr = l_curr->next;
900                 
901                         } else if ( (l_curr->start < lock->start) &&
902                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
903                                 /*
904                                  * Worst case scenario. Range completely overlaps an existing
905                                  * lock range. Split the request into two, push the new (upper) request
906                                  * into the dlink list, and continue with the entry after l_new (as we
907                                  * know that l_new will not overlap with this lock).
908                                  */
909 /*********************************************
910         +---------------------------+
911         |        l_curr             |
912         +---------------------------+
913                 +---------+
914                 | lock    |
915                 +---------+
916 BECOMES.....
917         +-------+         +---------+
918         | l_curr|         | l_new   |
919         +-------+         +---------+
920 **********************************************/
921                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
922
923                                 if(l_new == NULL) {
924                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
925                                         return NULL; /* The talloc_destroy takes care of cleanup. */
926                                 }
927
928                                 ZERO_STRUCTP(l_new);
929                                 l_new->start = lock->start + lock->size;
930                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
931
932                                 /* Truncate the l_curr. */
933                                 l_curr->size = lock->start - l_curr->start;
934
935                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
936 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
937                                                                 (double)l_new->start, (double)l_new->size ));
938
939                                 /*
940                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
941                                  */
942                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
943
944                                 /* And move after the link we added. */
945                                 l_curr = l_new->next;
946
947                         } else {
948
949                                 /*
950                                  * This logic case should never happen. Ensure this is the
951                                  * case by forcing an abort.... Remove in production.
952                                  */
953                                 char *msg = NULL;
954
955                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
956 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
957                                         smb_panic(msg);
958                                 } else {
959                                         smb_panic("posix_lock_list");
960                                 }
961                         }
962                 } /* end for ( l_curr = lhead; l_curr;) */
963         } /* end for (i=0; i<num_locks && ul_head; i++) */
964
965         return lhead;
966 }
967
968 /****************************************************************************
969  POSIX function to acquire a lock. Returns True if the
970  lock could be granted, False if not.
971 ****************************************************************************/
972
973 bool set_posix_lock_windows_flavour(files_struct *fsp,
974                         uint64_t u_offset,
975                         uint64_t u_count,
976                         enum brl_type lock_type,
977                         const struct lock_context *lock_ctx,
978                         const struct lock_struct *plocks,
979                         int num_locks,
980                         int *errno_ret)
981 {
982         off_t offset;
983         off_t count;
984         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
985         bool ret = True;
986         size_t lock_count;
987         TALLOC_CTX *l_ctx = NULL;
988         struct lock_list *llist = NULL;
989         struct lock_list *ll = NULL;
990
991         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
992                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
993                  (double)u_offset, (double)u_count,
994                  posix_lock_type_name(lock_type)));
995
996         /*
997          * If the requested lock won't fit in the POSIX range, we will
998          * pretend it was successful.
999          */
1000
1001         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1002                 increment_windows_lock_ref_count(fsp);
1003                 return True;
1004         }
1005
1006         /*
1007          * Windows is very strange. It allows read locks to be overlayed
1008          * (even over a write lock), but leaves the write lock in force until the first
1009          * unlock. It also reference counts the locks. This means the following sequence :
1010          *
1011          * process1                                      process2
1012          * ------------------------------------------------------------------------
1013          * WRITE LOCK : start = 2, len = 10
1014          *                                            READ LOCK: start =0, len = 10 - FAIL
1015          * READ LOCK : start = 0, len = 14 
1016          *                                            READ LOCK: start =0, len = 10 - FAIL
1017          * UNLOCK : start = 2, len = 10
1018          *                                            READ LOCK: start =0, len = 10 - OK
1019          *
1020          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
1021          * would leave a single read lock over the 0-14 region.
1022          */
1023         
1024         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
1025                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1026                 return False;
1027         }
1028
1029         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
1030                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1031                 talloc_destroy(l_ctx);
1032                 return False;
1033         }
1034
1035         /*
1036          * Create the initial list entry containing the
1037          * lock we want to add.
1038          */
1039
1040         ZERO_STRUCTP(ll);
1041         ll->start = offset;
1042         ll->size = count;
1043
1044         DLIST_ADD(llist, ll);
1045
1046         /*
1047          * The following call calculates if there are any
1048          * overlapping locks held by this process on
1049          * fd's open on the same file and splits this list
1050          * into a list of lock ranges that do not overlap with existing
1051          * POSIX locks.
1052          */
1053
1054         llist = posix_lock_list(l_ctx,
1055                                 llist,
1056                                 lock_ctx, /* Lock context llist belongs to. */
1057                                 fsp,
1058                                 plocks,
1059                                 num_locks);
1060
1061         /*
1062          * Add the POSIX locks on the list of ranges returned.
1063          * As the lock is supposed to be added atomically, we need to
1064          * back out all the locks if any one of these calls fail.
1065          */
1066
1067         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1068                 offset = ll->start;
1069                 count = ll->size;
1070
1071                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1072                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1073
1074                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1075                         *errno_ret = errno;
1076                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1077                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1078                         ret = False;
1079                         break;
1080                 }
1081         }
1082
1083         if (!ret) {
1084
1085                 /*
1086                  * Back out all the POSIX locks we have on fail.
1087                  */
1088
1089                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1090                         offset = ll->start;
1091                         count = ll->size;
1092
1093                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1094                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1095
1096                         posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK);
1097                 }
1098         } else {
1099                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1100                 increment_windows_lock_ref_count(fsp);
1101         }
1102
1103         talloc_destroy(l_ctx);
1104         return ret;
1105 }
1106
1107 /****************************************************************************
1108  POSIX function to release a lock. Returns True if the
1109  lock could be released, False if not.
1110 ****************************************************************************/
1111
1112 bool release_posix_lock_windows_flavour(files_struct *fsp,
1113                                 uint64_t u_offset,
1114                                 uint64_t u_count,
1115                                 enum brl_type deleted_lock_type,
1116                                 const struct lock_context *lock_ctx,
1117                                 const struct lock_struct *plocks,
1118                                 int num_locks)
1119 {
1120         off_t offset;
1121         off_t count;
1122         bool ret = True;
1123         TALLOC_CTX *ul_ctx = NULL;
1124         struct lock_list *ulist = NULL;
1125         struct lock_list *ul = NULL;
1126
1127         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1128                  "count = %.0f\n", fsp_str_dbg(fsp),
1129                  (double)u_offset, (double)u_count));
1130
1131         /* Remember the number of Windows locks we have on this dev/ino pair. */
1132         decrement_windows_lock_ref_count(fsp);
1133
1134         /*
1135          * If the requested lock won't fit in the POSIX range, we will
1136          * pretend it was successful.
1137          */
1138
1139         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1140                 return True;
1141         }
1142
1143         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1144                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1145                 return False;
1146         }
1147
1148         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1149                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1150                 talloc_destroy(ul_ctx);
1151                 return False;
1152         }
1153
1154         /*
1155          * Create the initial list entry containing the
1156          * lock we want to remove.
1157          */
1158
1159         ZERO_STRUCTP(ul);
1160         ul->start = offset;
1161         ul->size = count;
1162
1163         DLIST_ADD(ulist, ul);
1164
1165         /*
1166          * The following call calculates if there are any
1167          * overlapping locks held by this process on
1168          * fd's open on the same file and creates a
1169          * list of unlock ranges that will allow
1170          * POSIX lock ranges to remain on the file whilst the
1171          * unlocks are performed.
1172          */
1173
1174         ulist = posix_lock_list(ul_ctx,
1175                                 ulist,
1176                                 lock_ctx, /* Lock context ulist belongs to. */
1177                                 fsp,
1178                                 plocks,
1179                                 num_locks);
1180
1181         /*
1182          * If there were any overlapped entries (list is > 1 or size or start have changed),
1183          * and the lock_type we just deleted from
1184          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1185          * the POSIX lock to a read lock. This allows any overlapping read locks
1186          * to be atomically maintained.
1187          */
1188
1189         if (deleted_lock_type == WRITE_LOCK &&
1190                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1191
1192                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1193                         (double)offset, (double)count ));
1194
1195                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_RDLCK)) {
1196                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1197                         talloc_destroy(ul_ctx);
1198                         return False;
1199                 }
1200         }
1201
1202         /*
1203          * Release the POSIX locks on the list of ranges returned.
1204          */
1205
1206         for(; ulist; ulist = ulist->next) {
1207                 offset = ulist->start;
1208                 count = ulist->size;
1209
1210                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1211                         (double)offset, (double)count ));
1212
1213                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1214                         ret = False;
1215                 }
1216         }
1217
1218         talloc_destroy(ul_ctx);
1219         return ret;
1220 }
1221
1222 /****************************************************************************
1223  Next - the functions that deal with mapping CIFS POSIX locks onto
1224  the underlying system POSIX locks.
1225 ****************************************************************************/
1226
1227 /****************************************************************************
1228  POSIX function to acquire a lock. Returns True if the
1229  lock could be granted, False if not.
1230  As POSIX locks don't stack or conflict (they just overwrite)
1231  we can map the requested lock directly onto a system one. We
1232  know it doesn't conflict with locks on other contexts as the
1233  upper layer would have refused it.
1234 ****************************************************************************/
1235
1236 bool set_posix_lock_posix_flavour(files_struct *fsp,
1237                         uint64_t u_offset,
1238                         uint64_t u_count,
1239                         enum brl_type lock_type,
1240                         int *errno_ret)
1241 {
1242         off_t offset;
1243         off_t count;
1244         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1245
1246         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1247                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1248                  (double)u_offset, (double)u_count,
1249                  posix_lock_type_name(lock_type)));
1250
1251         /*
1252          * If the requested lock won't fit in the POSIX range, we will
1253          * pretend it was successful.
1254          */
1255
1256         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1257                 return True;
1258         }
1259
1260         if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,posix_lock_type)) {
1261                 *errno_ret = errno;
1262                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1263                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1264                 return False;
1265         }
1266         return True;
1267 }
1268
1269 /****************************************************************************
1270  POSIX function to release a lock. Returns True if the
1271  lock could be released, False if not.
1272  We are given a complete lock state from the upper layer which is what the lock
1273  state should be after the unlock has already been done, so what
1274  we do is punch out holes in the unlock range where locks owned by this process
1275  have a different lock context.
1276 ****************************************************************************/
1277
1278 bool release_posix_lock_posix_flavour(files_struct *fsp,
1279                                 uint64_t u_offset,
1280                                 uint64_t u_count,
1281                                 const struct lock_context *lock_ctx,
1282                                 const struct lock_struct *plocks,
1283                                 int num_locks)
1284 {
1285         bool ret = True;
1286         off_t offset;
1287         off_t count;
1288         TALLOC_CTX *ul_ctx = NULL;
1289         struct lock_list *ulist = NULL;
1290         struct lock_list *ul = NULL;
1291
1292         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1293                  "count = %.0f\n", fsp_str_dbg(fsp),
1294                  (double)u_offset, (double)u_count));
1295
1296         /*
1297          * If the requested lock won't fit in the POSIX range, we will
1298          * pretend it was successful.
1299          */
1300
1301         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1302                 return True;
1303         }
1304
1305         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1306                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1307                 return False;
1308         }
1309
1310         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1311                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1312                 talloc_destroy(ul_ctx);
1313                 return False;
1314         }
1315
1316         /*
1317          * Create the initial list entry containing the
1318          * lock we want to remove.
1319          */
1320
1321         ZERO_STRUCTP(ul);
1322         ul->start = offset;
1323         ul->size = count;
1324
1325         DLIST_ADD(ulist, ul);
1326
1327         /*
1328          * Walk the given array creating a linked list
1329          * of unlock requests.
1330          */
1331
1332         ulist = posix_lock_list(ul_ctx,
1333                                 ulist,
1334                                 lock_ctx, /* Lock context ulist belongs to. */
1335                                 fsp,
1336                                 plocks,
1337                                 num_locks);
1338
1339         /*
1340          * Release the POSIX locks on the list of ranges returned.
1341          */
1342
1343         for(; ulist; ulist = ulist->next) {
1344                 offset = ulist->start;
1345                 count = ulist->size;
1346
1347                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1348                         (double)offset, (double)count ));
1349
1350                 if (!posix_fcntl_lock(fsp,F_SETLK,offset,count,F_UNLCK)) {
1351                         ret = False;
1352                 }
1353         }
1354
1355         talloc_destroy(ul_ctx);
1356         return ret;
1357 }