tdb_compat: Higher level API fixes.
[kai/samba.git] / source3 / locking / posix.c
1 /* 
2    Unix SMB/CIFS implementation.
3    Locking functions
4    Copyright (C) Jeremy Allison 1992-2006
5    
6    This program is free software; you can redistribute it and/or modify
7    it under the terms of the GNU General Public License as published by
8    the Free Software Foundation; either version 3 of the License, or
9    (at your option) any later version.
10    
11    This program is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14    GNU General Public License for more details.
15    
16    You should have received a copy of the GNU General Public License
17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
18
19    Revision History:
20
21    POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
22 */
23
24 #include "includes.h"
25 #include "system/filesys.h"
26 #include "locking/proto.h"
27 #include "dbwrap.h"
28 #include "util_tdb.h"
29
30 #undef DBGC_CLASS
31 #define DBGC_CLASS DBGC_LOCKING
32
33 /*
34  * The pending close database handle.
35  */
36
37 static struct db_context *posix_pending_close_db;
38
39 /****************************************************************************
40  First - the functions that deal with the underlying system locks - these
41  functions are used no matter if we're mapping CIFS Windows locks or CIFS
42  POSIX locks onto POSIX.
43 ****************************************************************************/
44
45 /****************************************************************************
46  Utility function to map a lock type correctly depending on the open
47  mode of a file.
48 ****************************************************************************/
49
50 static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
51 {
52         if((lock_type == WRITE_LOCK) && !fsp->can_write) {
53                 /*
54                  * Many UNIX's cannot get a write lock on a file opened read-only.
55                  * Win32 locking semantics allow this.
56                  * Do the best we can and attempt a read-only lock.
57                  */
58                 DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
59                 return F_RDLCK;
60         }
61
62         /*
63          * This return should be the most normal, as we attempt
64          * to always open files read/write.
65          */
66
67         return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
68 }
69
70 /****************************************************************************
71  Debugging aid :-).
72 ****************************************************************************/
73
74 static const char *posix_lock_type_name(int lock_type)
75 {
76         return (lock_type == F_RDLCK) ? "READ" : "WRITE";
77 }
78
79 /****************************************************************************
80  Check to see if the given unsigned lock range is within the possible POSIX
81  range. Modifies the given args to be in range if possible, just returns
82  False if not.
83 ****************************************************************************/
84
85 static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
86                                 uint64_t u_offset, uint64_t u_count)
87 {
88         SMB_OFF_T offset = (SMB_OFF_T)u_offset;
89         SMB_OFF_T count = (SMB_OFF_T)u_count;
90
91         /*
92          * For the type of system we are, attempt to
93          * find the maximum positive lock offset as an SMB_OFF_T.
94          */
95
96 #if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
97
98         SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
99
100 #elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
101
102         /*
103          * In this case SMB_OFF_T is 64 bits,
104          * and the underlying system can handle 64 bit signed locks.
105          */
106
107         SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
108         SMB_OFF_T mask = (mask2<<1);
109         SMB_OFF_T max_positive_lock_offset = ~mask;
110
111 #else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
112
113         /*
114          * In this case either SMB_OFF_T is 32 bits,
115          * or the underlying system cannot handle 64 bit signed locks.
116          * All offsets & counts must be 2^31 or less.
117          */
118
119         SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
120
121 #endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
122
123         /*
124          * POSIX locks of length zero mean lock to end-of-file.
125          * Win32 locks of length zero are point probes. Ignore
126          * any Win32 locks of length zero. JRA.
127          */
128
129         if (count == (SMB_OFF_T)0) {
130                 DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
131                 return False;
132         }
133
134         /*
135          * If the given offset was > max_positive_lock_offset then we cannot map this at all
136          * ignore this lock.
137          */
138
139         if (u_offset & ~((uint64_t)max_positive_lock_offset)) {
140                 DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
141                                 (double)u_offset, (double)((uint64_t)max_positive_lock_offset) ));
142                 return False;
143         }
144
145         /*
146          * We must truncate the count to less than max_positive_lock_offset.
147          */
148
149         if (u_count & ~((uint64_t)max_positive_lock_offset)) {
150                 count = max_positive_lock_offset;
151         }
152
153         /*
154          * Truncate count to end at max lock offset.
155          */
156
157         if (offset + count < 0 || offset + count > max_positive_lock_offset) {
158                 count = max_positive_lock_offset - offset;
159         }
160
161         /*
162          * If we ate all the count, ignore this lock.
163          */
164
165         if (count == 0) {
166                 DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
167                                 (double)u_offset, (double)u_count ));
168                 return False;
169         }
170
171         /*
172          * The mapping was successful.
173          */
174
175         DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
176                         (double)offset, (double)count ));
177
178         *offset_out = offset;
179         *count_out = count;
180         
181         return True;
182 }
183
184 bool smb_vfs_call_lock(struct vfs_handle_struct *handle,
185                        struct files_struct *fsp, int op, SMB_OFF_T offset,
186                        SMB_OFF_T count, int type)
187 {
188         VFS_FIND(lock);
189         return handle->fns->lock(handle, fsp, op, offset, count, type);
190 }
191
192 /****************************************************************************
193  Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
194  broken NFS implementations.
195 ****************************************************************************/
196
197 static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
198 {
199         bool ret;
200
201         DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
202
203         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
204
205         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
206
207                 DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
208                                         (double)offset,(double)count));
209                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
210                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
211
212                 /*
213                  * If the offset is > 0x7FFFFFFF then this will cause problems on
214                  * 32 bit NFS mounted filesystems. Just ignore it.
215                  */
216
217                 if (offset & ~((SMB_OFF_T)0x7fffffff)) {
218                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
219                         return True;
220                 }
221
222                 if (count & ~((SMB_OFF_T)0x7fffffff)) {
223                         /* 32 bit NFS file system, retry with smaller offset */
224                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
225                         errno = 0;
226                         count &= 0x7fffffff;
227                         ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
228                 }
229         }
230
231         DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
232         return ret;
233 }
234
235 bool smb_vfs_call_getlock(struct vfs_handle_struct *handle,
236                           struct files_struct *fsp, SMB_OFF_T *poffset,
237                           SMB_OFF_T *pcount, int *ptype, pid_t *ppid)
238 {
239         VFS_FIND(getlock);
240         return handle->fns->getlock(handle, fsp, poffset, pcount, ptype, ppid);
241 }
242
243 /****************************************************************************
244  Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
245  broken NFS implementations.
246 ****************************************************************************/
247
248 static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
249 {
250         pid_t pid;
251         bool ret;
252
253         DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
254                 fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
255
256         ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
257
258         if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
259
260                 DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
261                                         (double)*poffset,(double)*pcount));
262                 DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
263                 DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
264
265                 /*
266                  * If the offset is > 0x7FFFFFFF then this will cause problems on
267                  * 32 bit NFS mounted filesystems. Just ignore it.
268                  */
269
270                 if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
271                         DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
272                         return True;
273                 }
274
275                 if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
276                         /* 32 bit NFS file system, retry with smaller offset */
277                         DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
278                         errno = 0;
279                         *pcount &= 0x7fffffff;
280                         ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
281                 }
282         }
283
284         DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
285         return ret;
286 }
287
288 /****************************************************************************
289  POSIX function to see if a file region is locked. Returns True if the
290  region is locked, False otherwise.
291 ****************************************************************************/
292
293 bool is_posix_locked(files_struct *fsp,
294                         uint64_t *pu_offset,
295                         uint64_t *pu_count,
296                         enum brl_type *plock_type,
297                         enum brl_flavour lock_flav)
298 {
299         SMB_OFF_T offset;
300         SMB_OFF_T count;
301         int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
302
303         DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, "
304                   "type = %s\n", fsp_str_dbg(fsp), (double)*pu_offset,
305                   (double)*pu_count,  posix_lock_type_name(*plock_type)));
306
307         /*
308          * If the requested lock won't fit in the POSIX range, we will
309          * never set it, so presume it is not locked.
310          */
311
312         if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
313                 return False;
314         }
315
316         if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
317                 return False;
318         }
319
320         if (posix_lock_type == F_UNLCK) {
321                 return False;
322         }
323
324         if (lock_flav == POSIX_LOCK) {
325                 /* Only POSIX lock queries need to know the details. */
326                 *pu_offset = (uint64_t)offset;
327                 *pu_count = (uint64_t)count;
328                 *plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
329         }
330         return True;
331 }
332
333 /****************************************************************************
334  Next - the functions that deal with in memory database storing representations
335  of either Windows CIFS locks or POSIX CIFS locks.
336 ****************************************************************************/
337
338 /* The key used in the in-memory POSIX databases. */
339
340 struct lock_ref_count_key {
341         struct file_id id;
342         char r;
343 }; 
344
345 /*******************************************************************
346  Form a static locking key for a dev/inode pair for the lock ref count
347 ******************************************************************/
348
349 static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
350                                           struct lock_ref_count_key *tmp)
351 {
352         ZERO_STRUCTP(tmp);
353         tmp->id = fsp->file_id;
354         tmp->r = 'r';
355         return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
356 }
357
358 /*******************************************************************
359  Convenience function to get an fd_array key from an fsp.
360 ******************************************************************/
361
362 static TDB_DATA fd_array_key_fsp(files_struct *fsp)
363 {
364         return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
365 }
366
367 /*******************************************************************
368  Create the in-memory POSIX lock databases.
369 ********************************************************************/
370
371 bool posix_locking_init(bool read_only)
372 {
373         if (posix_pending_close_db != NULL) {
374                 return true;
375         }
376
377         posix_pending_close_db = db_open_rbt(NULL);
378
379         if (posix_pending_close_db == NULL) {
380                 DEBUG(0,("Failed to open POSIX pending close database.\n"));
381                 return false;
382         }
383
384         return true;
385 }
386
387 /*******************************************************************
388  Delete the in-memory POSIX lock databases.
389 ********************************************************************/
390
391 bool posix_locking_end(void)
392 {
393         /*
394          * Shouldn't we close all fd's here?
395          */
396         TALLOC_FREE(posix_pending_close_db);
397         return true;
398 }
399
400 /****************************************************************************
401  Next - the functions that deal with storing fd's that have outstanding
402  POSIX locks when closed.
403 ****************************************************************************/
404
405 /****************************************************************************
406  The records in posix_pending_close_tdb are composed of an array of ints
407  keyed by dev/ino pair.
408  The first int is a reference count of the number of outstanding locks on
409  all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
410  were open on this dev/ino pair that should have been closed, but can't as
411  the lock ref count is non zero.
412 ****************************************************************************/
413
414 /****************************************************************************
415  Keep a reference count of the number of Windows locks open on this dev/ino
416  pair. Creates entry if it doesn't exist.
417 ****************************************************************************/
418
419 static void increment_windows_lock_ref_count(files_struct *fsp)
420 {
421         struct lock_ref_count_key tmp;
422         struct db_record *rec;
423         int lock_ref_count = 0;
424         NTSTATUS status;
425
426         rec = posix_pending_close_db->fetch_locked(
427                 posix_pending_close_db, talloc_tos(),
428                 locking_ref_count_key_fsp(fsp, &tmp));
429
430         SMB_ASSERT(rec != NULL);
431
432         if (rec->value.dptr != NULL) {
433                 SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
434                 memcpy(&lock_ref_count, rec->value.dptr,
435                        sizeof(lock_ref_count));
436         }
437
438         lock_ref_count++;
439
440         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
441                                                sizeof(lock_ref_count)), 0);
442
443         SMB_ASSERT(NT_STATUS_IS_OK(status));
444
445         TALLOC_FREE(rec);
446
447         DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
448                   fsp_str_dbg(fsp), lock_ref_count));
449 }
450
451 /****************************************************************************
452  Bulk delete - subtract as many locks as we've just deleted.
453 ****************************************************************************/
454
455 void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
456 {
457         struct lock_ref_count_key tmp;
458         struct db_record *rec;
459         int lock_ref_count = 0;
460         NTSTATUS status;
461
462         rec = posix_pending_close_db->fetch_locked(
463                 posix_pending_close_db, talloc_tos(),
464                 locking_ref_count_key_fsp(fsp, &tmp));
465
466         SMB_ASSERT((rec != NULL)
467                    && (rec->value.dptr != NULL)
468                    && (rec->value.dsize == sizeof(lock_ref_count)));
469
470         memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
471
472         SMB_ASSERT(lock_ref_count > 0);
473
474         lock_ref_count -= dcount;
475
476         status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
477                                                sizeof(lock_ref_count)), 0);
478
479         SMB_ASSERT(NT_STATUS_IS_OK(status));
480
481         TALLOC_FREE(rec);
482
483         DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
484                   fsp_str_dbg(fsp), lock_ref_count));
485 }
486
487 static void decrement_windows_lock_ref_count(files_struct *fsp)
488 {
489         reduce_windows_lock_ref_count(fsp, 1);
490 }
491
492 /****************************************************************************
493  Fetch the lock ref count.
494 ****************************************************************************/
495
496 static int get_windows_lock_ref_count(files_struct *fsp)
497 {
498         struct lock_ref_count_key tmp;
499         TDB_DATA dbuf;
500         int res;
501         int lock_ref_count = 0;
502
503         res = posix_pending_close_db->fetch(
504                 posix_pending_close_db, talloc_tos(),
505                 locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
506
507         SMB_ASSERT(res == 0);
508
509         if (dbuf.dsize != 0) {
510                 SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
511                 memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
512                 TALLOC_FREE(dbuf.dptr);
513         }
514
515         DEBUG(10,("get_windows_lock_count for file %s = %d\n",
516                   fsp_str_dbg(fsp), lock_ref_count));
517
518         return lock_ref_count;
519 }
520
521 /****************************************************************************
522  Delete a lock_ref_count entry.
523 ****************************************************************************/
524
525 static void delete_windows_lock_ref_count(files_struct *fsp)
526 {
527         struct lock_ref_count_key tmp;
528         struct db_record *rec;
529
530         rec = posix_pending_close_db->fetch_locked(
531                 posix_pending_close_db, talloc_tos(),
532                 locking_ref_count_key_fsp(fsp, &tmp));
533
534         SMB_ASSERT(rec != NULL);
535
536         /* Not a bug if it doesn't exist - no locks were ever granted. */
537
538         rec->delete_rec(rec);
539         TALLOC_FREE(rec);
540
541         DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
542                   fsp_str_dbg(fsp)));
543 }
544
545 /****************************************************************************
546  Add an fd to the pending close tdb.
547 ****************************************************************************/
548
549 static void add_fd_to_close_entry(files_struct *fsp)
550 {
551         struct db_record *rec;
552         uint8_t *new_data;
553         NTSTATUS status;
554
555         rec = posix_pending_close_db->fetch_locked(
556                 posix_pending_close_db, talloc_tos(),
557                 fd_array_key_fsp(fsp));
558
559         SMB_ASSERT(rec != NULL);
560
561         new_data = talloc_array(
562                 rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
563
564         SMB_ASSERT(new_data != NULL);
565
566         memcpy(new_data, rec->value.dptr, rec->value.dsize);
567         memcpy(new_data + rec->value.dsize,
568                &fsp->fh->fd, sizeof(fsp->fh->fd));
569
570         status = rec->store(
571                 rec, make_tdb_data(new_data,
572                                    rec->value.dsize + sizeof(fsp->fh->fd)), 0);
573
574         SMB_ASSERT(NT_STATUS_IS_OK(status));
575
576         TALLOC_FREE(rec);
577
578         DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
579                   fsp->fh->fd, fsp_str_dbg(fsp)));
580 }
581
582 /****************************************************************************
583  Remove all fd entries for a specific dev/inode pair from the tdb.
584 ****************************************************************************/
585
586 static void delete_close_entries(files_struct *fsp)
587 {
588         struct db_record *rec;
589
590         rec = posix_pending_close_db->fetch_locked(
591                 posix_pending_close_db, talloc_tos(),
592                 fd_array_key_fsp(fsp));
593
594         SMB_ASSERT(rec != NULL);
595         rec->delete_rec(rec);
596         TALLOC_FREE(rec);
597 }
598
599 /****************************************************************************
600  Get the array of POSIX pending close records for an open fsp. Returns number
601  of entries.
602 ****************************************************************************/
603
604 static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
605                                               files_struct *fsp, int **entries)
606 {
607         TDB_DATA dbuf;
608         int res;
609
610         res = posix_pending_close_db->fetch(
611                 posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
612                 &dbuf);
613
614         SMB_ASSERT(res == 0);
615
616         if (dbuf.dsize == 0) {
617                 *entries = NULL;
618                 return 0;
619         }
620
621         *entries = (int *)dbuf.dptr;
622         return (size_t)(dbuf.dsize / sizeof(int));
623 }
624
625 /****************************************************************************
626  Deal with pending closes needed by POSIX locking support.
627  Note that posix_locking_close_file() is expected to have been called
628  to delete all locks on this fsp before this function is called.
629 ****************************************************************************/
630
631 int fd_close_posix(struct files_struct *fsp)
632 {
633         int saved_errno = 0;
634         int ret;
635         int *fd_array = NULL;
636         size_t count, i;
637
638         if (!lp_locking(fsp->conn->params) ||
639             !lp_posix_locking(fsp->conn->params))
640         {
641                 /*
642                  * No locking or POSIX to worry about or we want POSIX semantics
643                  * which will lose all locks on all fd's open on this dev/inode,
644                  * just close.
645                  */
646                 return close(fsp->fh->fd);
647         }
648
649         if (get_windows_lock_ref_count(fsp)) {
650
651                 /*
652                  * There are outstanding locks on this dev/inode pair on
653                  * other fds. Add our fd to the pending close tdb and set
654                  * fsp->fh->fd to -1.
655                  */
656
657                 add_fd_to_close_entry(fsp);
658                 return 0;
659         }
660
661         /*
662          * No outstanding locks. Get the pending close fd's
663          * from the tdb and close them all.
664          */
665
666         count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
667
668         if (count) {
669                 DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
670                           (unsigned int)count));
671
672                 for(i = 0; i < count; i++) {
673                         if (close(fd_array[i]) == -1) {
674                                 saved_errno = errno;
675                         }
676                 }
677
678                 /*
679                  * Delete all fd's stored in the tdb
680                  * for this dev/inode pair.
681                  */
682
683                 delete_close_entries(fsp);
684         }
685
686         TALLOC_FREE(fd_array);
687
688         /* Don't need a lock ref count on this dev/ino anymore. */
689         delete_windows_lock_ref_count(fsp);
690
691         /*
692          * Finally close the fd associated with this fsp.
693          */
694
695         ret = close(fsp->fh->fd);
696
697         if (ret == 0 && saved_errno != 0) {
698                 errno = saved_errno;
699                 ret = -1;
700         }
701
702         return ret;
703 }
704
705 /****************************************************************************
706  Next - the functions that deal with the mapping CIFS Windows locks onto
707  the underlying system POSIX locks.
708 ****************************************************************************/
709
710 /*
711  * Structure used when splitting a lock range
712  * into a POSIX lock range. Doubly linked list.
713  */
714
715 struct lock_list {
716         struct lock_list *next;
717         struct lock_list *prev;
718         SMB_OFF_T start;
719         SMB_OFF_T size;
720 };
721
722 /****************************************************************************
723  Create a list of lock ranges that don't overlap a given range. Used in calculating
724  POSIX locks and unlocks. This is a difficult function that requires ASCII art to
725  understand it :-).
726 ****************************************************************************/
727
728 static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
729                                                 struct lock_list *lhead,
730                                                 const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
731                                                 files_struct *fsp,
732                                                 const struct lock_struct *plocks,
733                                                 int num_locks)
734 {
735         int i;
736
737         /*
738          * Check the current lock list on this dev/inode pair.
739          * Quit if the list is deleted.
740          */
741
742         DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
743                 (double)lhead->start, (double)lhead->size ));
744
745         for (i=0; i<num_locks && lhead; i++) {
746                 const struct lock_struct *lock = &plocks[i];
747                 struct lock_list *l_curr;
748
749                 /* Ignore all but read/write locks. */
750                 if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
751                         continue;
752                 }
753
754                 /* Ignore locks not owned by this process. */
755                 if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
756                         continue;
757                 }
758
759                 /*
760                  * Walk the lock list, checking for overlaps. Note that
761                  * the lock list can expand within this loop if the current
762                  * range being examined needs to be split.
763                  */
764
765                 for (l_curr = lhead; l_curr;) {
766
767                         DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
768                                 (double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
769
770                         if ( (l_curr->start >= (lock->start + lock->size)) ||
771                                  (lock->start >= (l_curr->start + l_curr->size))) {
772
773                                 /* No overlap with existing lock - leave this range alone. */
774 /*********************************************
775                                              +---------+
776                                              | l_curr  |
777                                              +---------+
778                                 +-------+
779                                 | lock  |
780                                 +-------+
781 OR....
782              +---------+
783              |  l_curr |
784              +---------+
785 **********************************************/
786
787                                 DEBUG(10,(" no overlap case.\n" ));
788
789                                 l_curr = l_curr->next;
790
791                         } else if ( (l_curr->start >= lock->start) &&
792                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
793
794                                 /*
795                                  * This range is completely overlapped by this existing lock range
796                                  * and thus should have no effect. Delete it from the list.
797                                  */
798 /*********************************************
799                 +---------+
800                 |  l_curr |
801                 +---------+
802         +---------------------------+
803         |       lock                |
804         +---------------------------+
805 **********************************************/
806                                 /* Save the next pointer */
807                                 struct lock_list *ul_next = l_curr->next;
808
809                                 DEBUG(10,(" delete case.\n" ));
810
811                                 DLIST_REMOVE(lhead, l_curr);
812                                 if(lhead == NULL) {
813                                         break; /* No more list... */
814                                 }
815
816                                 l_curr = ul_next;
817                                 
818                         } else if ( (l_curr->start >= lock->start) &&
819                                                 (l_curr->start < lock->start + lock->size) &&
820                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
821
822                                 /*
823                                  * This range overlaps the existing lock range at the high end.
824                                  * Truncate by moving start to existing range end and reducing size.
825                                  */
826 /*********************************************
827                 +---------------+
828                 |  l_curr       |
829                 +---------------+
830         +---------------+
831         |    lock       |
832         +---------------+
833 BECOMES....
834                         +-------+
835                         | l_curr|
836                         +-------+
837 **********************************************/
838
839                                 l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
840                                 l_curr->start = lock->start + lock->size;
841
842                                 DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
843                                                                 (double)l_curr->start, (double)l_curr->size ));
844
845                                 l_curr = l_curr->next;
846
847                         } else if ( (l_curr->start < lock->start) &&
848                                                 (l_curr->start + l_curr->size > lock->start) &&
849                                                 (l_curr->start + l_curr->size <= lock->start + lock->size) ) {
850
851                                 /*
852                                  * This range overlaps the existing lock range at the low end.
853                                  * Truncate by reducing size.
854                                  */
855 /*********************************************
856    +---------------+
857    |  l_curr       |
858    +---------------+
859            +---------------+
860            |    lock       |
861            +---------------+
862 BECOMES....
863    +-------+
864    | l_curr|
865    +-------+
866 **********************************************/
867
868                                 l_curr->size = lock->start - l_curr->start;
869
870                                 DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
871                                                                 (double)l_curr->start, (double)l_curr->size ));
872
873                                 l_curr = l_curr->next;
874                 
875                         } else if ( (l_curr->start < lock->start) &&
876                                                 (l_curr->start + l_curr->size > lock->start + lock->size) ) {
877                                 /*
878                                  * Worst case scenario. Range completely overlaps an existing
879                                  * lock range. Split the request into two, push the new (upper) request
880                                  * into the dlink list, and continue with the entry after l_new (as we
881                                  * know that l_new will not overlap with this lock).
882                                  */
883 /*********************************************
884         +---------------------------+
885         |        l_curr             |
886         +---------------------------+
887                 +---------+
888                 | lock    |
889                 +---------+
890 BECOMES.....
891         +-------+         +---------+
892         | l_curr|         | l_new   |
893         +-------+         +---------+
894 **********************************************/
895                                 struct lock_list *l_new = talloc(ctx, struct lock_list);
896
897                                 if(l_new == NULL) {
898                                         DEBUG(0,("posix_lock_list: talloc fail.\n"));
899                                         return NULL; /* The talloc_destroy takes care of cleanup. */
900                                 }
901
902                                 ZERO_STRUCTP(l_new);
903                                 l_new->start = lock->start + lock->size;
904                                 l_new->size = l_curr->start + l_curr->size - l_new->start;
905
906                                 /* Truncate the l_curr. */
907                                 l_curr->size = lock->start - l_curr->start;
908
909                                 DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
910 new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
911                                                                 (double)l_new->start, (double)l_new->size ));
912
913                                 /*
914                                  * Add into the dlink list after the l_curr point - NOT at lhead. 
915                                  */
916                                 DLIST_ADD_AFTER(lhead, l_new, l_curr);
917
918                                 /* And move after the link we added. */
919                                 l_curr = l_new->next;
920
921                         } else {
922
923                                 /*
924                                  * This logic case should never happen. Ensure this is the
925                                  * case by forcing an abort.... Remove in production.
926                                  */
927                                 char *msg = NULL;
928
929                                 if (asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
930 lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size ) != -1) {
931                                         smb_panic(msg);
932                                 } else {
933                                         smb_panic("posix_lock_list");
934                                 }
935                         }
936                 } /* end for ( l_curr = lhead; l_curr;) */
937         } /* end for (i=0; i<num_locks && ul_head; i++) */
938
939         return lhead;
940 }
941
942 /****************************************************************************
943  POSIX function to acquire a lock. Returns True if the
944  lock could be granted, False if not.
945 ****************************************************************************/
946
947 bool set_posix_lock_windows_flavour(files_struct *fsp,
948                         uint64_t u_offset,
949                         uint64_t u_count,
950                         enum brl_type lock_type,
951                         const struct lock_context *lock_ctx,
952                         const struct lock_struct *plocks,
953                         int num_locks,
954                         int *errno_ret)
955 {
956         SMB_OFF_T offset;
957         SMB_OFF_T count;
958         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
959         bool ret = True;
960         size_t lock_count;
961         TALLOC_CTX *l_ctx = NULL;
962         struct lock_list *llist = NULL;
963         struct lock_list *ll = NULL;
964
965         DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, "
966                  "count = %.0f, type = %s\n", fsp_str_dbg(fsp),
967                  (double)u_offset, (double)u_count,
968                  posix_lock_type_name(lock_type)));
969
970         /*
971          * If the requested lock won't fit in the POSIX range, we will
972          * pretend it was successful.
973          */
974
975         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
976                 increment_windows_lock_ref_count(fsp);
977                 return True;
978         }
979
980         /*
981          * Windows is very strange. It allows read locks to be overlayed
982          * (even over a write lock), but leaves the write lock in force until the first
983          * unlock. It also reference counts the locks. This means the following sequence :
984          *
985          * process1                                      process2
986          * ------------------------------------------------------------------------
987          * WRITE LOCK : start = 2, len = 10
988          *                                            READ LOCK: start =0, len = 10 - FAIL
989          * READ LOCK : start = 0, len = 14 
990          *                                            READ LOCK: start =0, len = 10 - FAIL
991          * UNLOCK : start = 2, len = 10
992          *                                            READ LOCK: start =0, len = 10 - OK
993          *
994          * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
995          * would leave a single read lock over the 0-14 region.
996          */
997         
998         if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
999                 DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
1000                 return False;
1001         }
1002
1003         if ((ll = talloc(l_ctx, struct lock_list)) == NULL) {
1004                 DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1005                 talloc_destroy(l_ctx);
1006                 return False;
1007         }
1008
1009         /*
1010          * Create the initial list entry containing the
1011          * lock we want to add.
1012          */
1013
1014         ZERO_STRUCTP(ll);
1015         ll->start = offset;
1016         ll->size = count;
1017
1018         DLIST_ADD(llist, ll);
1019
1020         /*
1021          * The following call calculates if there are any
1022          * overlapping locks held by this process on
1023          * fd's open on the same file and splits this list
1024          * into a list of lock ranges that do not overlap with existing
1025          * POSIX locks.
1026          */
1027
1028         llist = posix_lock_list(l_ctx,
1029                                 llist,
1030                                 lock_ctx, /* Lock context llist belongs to. */
1031                                 fsp,
1032                                 plocks,
1033                                 num_locks);
1034
1035         /*
1036          * Add the POSIX locks on the list of ranges returned.
1037          * As the lock is supposed to be added atomically, we need to
1038          * back out all the locks if any one of these calls fail.
1039          */
1040
1041         for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
1042                 offset = ll->start;
1043                 count = ll->size;
1044
1045                 DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
1046                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1047
1048                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1049                         *errno_ret = errno;
1050                         DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1051                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1052                         ret = False;
1053                         break;
1054                 }
1055         }
1056
1057         if (!ret) {
1058
1059                 /*
1060                  * Back out all the POSIX locks we have on fail.
1061                  */
1062
1063                 for (ll = llist; lock_count; ll = ll->next, lock_count--) {
1064                         offset = ll->start;
1065                         count = ll->size;
1066
1067                         DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
1068                                 posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
1069
1070                         posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
1071                 }
1072         } else {
1073                 /* Remember the number of Windows locks we have on this dev/ino pair. */
1074                 increment_windows_lock_ref_count(fsp);
1075         }
1076
1077         talloc_destroy(l_ctx);
1078         return ret;
1079 }
1080
1081 /****************************************************************************
1082  POSIX function to release a lock. Returns True if the
1083  lock could be released, False if not.
1084 ****************************************************************************/
1085
1086 bool release_posix_lock_windows_flavour(files_struct *fsp,
1087                                 uint64_t u_offset,
1088                                 uint64_t u_count,
1089                                 enum brl_type deleted_lock_type,
1090                                 const struct lock_context *lock_ctx,
1091                                 const struct lock_struct *plocks,
1092                                 int num_locks)
1093 {
1094         SMB_OFF_T offset;
1095         SMB_OFF_T count;
1096         bool ret = True;
1097         TALLOC_CTX *ul_ctx = NULL;
1098         struct lock_list *ulist = NULL;
1099         struct lock_list *ul = NULL;
1100
1101         DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, "
1102                  "count = %.0f\n", fsp_str_dbg(fsp),
1103                  (double)u_offset, (double)u_count));
1104
1105         /* Remember the number of Windows locks we have on this dev/ino pair. */
1106         decrement_windows_lock_ref_count(fsp);
1107
1108         /*
1109          * If the requested lock won't fit in the POSIX range, we will
1110          * pretend it was successful.
1111          */
1112
1113         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1114                 return True;
1115         }
1116
1117         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1118                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1119                 return False;
1120         }
1121
1122         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1123                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1124                 talloc_destroy(ul_ctx);
1125                 return False;
1126         }
1127
1128         /*
1129          * Create the initial list entry containing the
1130          * lock we want to remove.
1131          */
1132
1133         ZERO_STRUCTP(ul);
1134         ul->start = offset;
1135         ul->size = count;
1136
1137         DLIST_ADD(ulist, ul);
1138
1139         /*
1140          * The following call calculates if there are any
1141          * overlapping locks held by this process on
1142          * fd's open on the same file and creates a
1143          * list of unlock ranges that will allow
1144          * POSIX lock ranges to remain on the file whilst the
1145          * unlocks are performed.
1146          */
1147
1148         ulist = posix_lock_list(ul_ctx,
1149                                 ulist,
1150                                 lock_ctx, /* Lock context ulist belongs to. */
1151                                 fsp,
1152                                 plocks,
1153                                 num_locks);
1154
1155         /*
1156          * If there were any overlapped entries (list is > 1 or size or start have changed),
1157          * and the lock_type we just deleted from
1158          * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
1159          * the POSIX lock to a read lock. This allows any overlapping read locks
1160          * to be atomically maintained.
1161          */
1162
1163         if (deleted_lock_type == WRITE_LOCK &&
1164                         (!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
1165
1166                 DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
1167                         (double)offset, (double)count ));
1168
1169                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
1170                         DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
1171                         talloc_destroy(ul_ctx);
1172                         return False;
1173                 }
1174         }
1175
1176         /*
1177          * Release the POSIX locks on the list of ranges returned.
1178          */
1179
1180         for(; ulist; ulist = ulist->next) {
1181                 offset = ulist->start;
1182                 count = ulist->size;
1183
1184                 DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1185                         (double)offset, (double)count ));
1186
1187                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1188                         ret = False;
1189                 }
1190         }
1191
1192         talloc_destroy(ul_ctx);
1193         return ret;
1194 }
1195
1196 /****************************************************************************
1197  Next - the functions that deal with mapping CIFS POSIX locks onto
1198  the underlying system POSIX locks.
1199 ****************************************************************************/
1200
1201 /****************************************************************************
1202  POSIX function to acquire a lock. Returns True if the
1203  lock could be granted, False if not.
1204  As POSIX locks don't stack or conflict (they just overwrite)
1205  we can map the requested lock directly onto a system one. We
1206  know it doesn't conflict with locks on other contexts as the
1207  upper layer would have refused it.
1208 ****************************************************************************/
1209
1210 bool set_posix_lock_posix_flavour(files_struct *fsp,
1211                         uint64_t u_offset,
1212                         uint64_t u_count,
1213                         enum brl_type lock_type,
1214                         int *errno_ret)
1215 {
1216         SMB_OFF_T offset;
1217         SMB_OFF_T count;
1218         int posix_lock_type = map_posix_lock_type(fsp,lock_type);
1219
1220         DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count "
1221                  "= %.0f, type = %s\n", fsp_str_dbg(fsp),
1222                  (double)u_offset, (double)u_count,
1223                  posix_lock_type_name(lock_type)));
1224
1225         /*
1226          * If the requested lock won't fit in the POSIX range, we will
1227          * pretend it was successful.
1228          */
1229
1230         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1231                 return True;
1232         }
1233
1234         if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
1235                 *errno_ret = errno;
1236                 DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
1237                         posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
1238                 return False;
1239         }
1240         return True;
1241 }
1242
1243 /****************************************************************************
1244  POSIX function to release a lock. Returns True if the
1245  lock could be released, False if not.
1246  We are given a complete lock state from the upper layer which is what the lock
1247  state should be after the unlock has already been done, so what
1248  we do is punch out holes in the unlock range where locks owned by this process
1249  have a different lock context.
1250 ****************************************************************************/
1251
1252 bool release_posix_lock_posix_flavour(files_struct *fsp,
1253                                 uint64_t u_offset,
1254                                 uint64_t u_count,
1255                                 const struct lock_context *lock_ctx,
1256                                 const struct lock_struct *plocks,
1257                                 int num_locks)
1258 {
1259         bool ret = True;
1260         SMB_OFF_T offset;
1261         SMB_OFF_T count;
1262         TALLOC_CTX *ul_ctx = NULL;
1263         struct lock_list *ulist = NULL;
1264         struct lock_list *ul = NULL;
1265
1266         DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, "
1267                  "count = %.0f\n", fsp_str_dbg(fsp),
1268                  (double)u_offset, (double)u_count));
1269
1270         /*
1271          * If the requested lock won't fit in the POSIX range, we will
1272          * pretend it was successful.
1273          */
1274
1275         if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
1276                 return True;
1277         }
1278
1279         if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
1280                 DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
1281                 return False;
1282         }
1283
1284         if ((ul = talloc(ul_ctx, struct lock_list)) == NULL) {
1285                 DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
1286                 talloc_destroy(ul_ctx);
1287                 return False;
1288         }
1289
1290         /*
1291          * Create the initial list entry containing the
1292          * lock we want to remove.
1293          */
1294
1295         ZERO_STRUCTP(ul);
1296         ul->start = offset;
1297         ul->size = count;
1298
1299         DLIST_ADD(ulist, ul);
1300
1301         /*
1302          * Walk the given array creating a linked list
1303          * of unlock requests.
1304          */
1305
1306         ulist = posix_lock_list(ul_ctx,
1307                                 ulist,
1308                                 lock_ctx, /* Lock context ulist belongs to. */
1309                                 fsp,
1310                                 plocks,
1311                                 num_locks);
1312
1313         /*
1314          * Release the POSIX locks on the list of ranges returned.
1315          */
1316
1317         for(; ulist; ulist = ulist->next) {
1318                 offset = ulist->start;
1319                 count = ulist->size;
1320
1321                 DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
1322                         (double)offset, (double)count ));
1323
1324                 if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
1325                         ret = False;
1326                 }
1327         }
1328
1329         talloc_destroy(ul_ctx);
1330         return ret;
1331 }