ecryptfs: convert to file_write_and_wait in ->fsync
[sfrench/cifs-2.6.git] / drivers / staging / lustre / lustre / obdclass / cl_page.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2011, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * Client Lustre Page.
33  *
34  *   Author: Nikita Danilov <nikita.danilov@sun.com>
35  *   Author: Jinshan Xiong <jinshan.xiong@intel.com>
36  */
37
38 #define DEBUG_SUBSYSTEM S_CLASS
39
40 #include "../../include/linux/libcfs/libcfs.h"
41 #include "../include/obd_class.h"
42 #include "../include/obd_support.h"
43 #include <linux/list.h>
44
45 #include "../include/cl_object.h"
46 #include "cl_internal.h"
47
48 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg);
49
50 # define PASSERT(env, page, expr)                                          \
51         do {                                                               \
52                 if (unlikely(!(expr))) {                                   \
53                         CL_PAGE_DEBUG(D_ERROR, (env), (page), #expr "\n"); \
54                         LASSERT(0);                                        \
55                 }                                                          \
56         } while (0)
57
58 # define PINVRNT(env, page, exp) \
59         ((void)sizeof(env), (void)sizeof(page), (void)sizeof !!(exp))
60
61 /**
62  * Internal version of cl_page_get().
63  *
64  * This function can be used to obtain initial reference to previously
65  * unreferenced cached object. It can be called only if concurrent page
66  * reclamation is somehow prevented, e.g., by keeping a lock on a VM page,
67  * associated with \a page.
68  *
69  * Use with care! Not exported.
70  */
71 static void cl_page_get_trust(struct cl_page *page)
72 {
73         LASSERT(atomic_read(&page->cp_ref) > 0);
74         atomic_inc(&page->cp_ref);
75 }
76
77 /**
78  * Returns a slice within a page, corresponding to the given layer in the
79  * device stack.
80  *
81  * \see cl_lock_at()
82  */
83 static const struct cl_page_slice *
84 cl_page_at_trusted(const struct cl_page *page,
85                    const struct lu_device_type *dtype)
86 {
87         const struct cl_page_slice *slice;
88
89         list_for_each_entry(slice, &page->cp_layers, cpl_linkage) {
90                 if (slice->cpl_obj->co_lu.lo_dev->ld_type == dtype)
91                         return slice;
92         }
93         return NULL;
94 }
95
96 static void cl_page_free(const struct lu_env *env, struct cl_page *page)
97 {
98         struct cl_object *obj  = page->cp_obj;
99
100         PASSERT(env, page, list_empty(&page->cp_batch));
101         PASSERT(env, page, !page->cp_owner);
102         PASSERT(env, page, page->cp_state == CPS_FREEING);
103
104         while (!list_empty(&page->cp_layers)) {
105                 struct cl_page_slice *slice;
106
107                 slice = list_entry(page->cp_layers.next,
108                                    struct cl_page_slice, cpl_linkage);
109                 list_del_init(page->cp_layers.next);
110                 if (unlikely(slice->cpl_ops->cpo_fini))
111                         slice->cpl_ops->cpo_fini(env, slice);
112         }
113         lu_object_ref_del_at(&obj->co_lu, &page->cp_obj_ref, "cl_page", page);
114         cl_object_put(env, obj);
115         lu_ref_fini(&page->cp_reference);
116         kfree(page);
117 }
118
119 /**
120  * Helper function updating page state. This is the only place in the code
121  * where cl_page::cp_state field is mutated.
122  */
123 static inline void cl_page_state_set_trust(struct cl_page *page,
124                                            enum cl_page_state state)
125 {
126         /* bypass const. */
127         *(enum cl_page_state *)&page->cp_state = state;
128 }
129
130 struct cl_page *cl_page_alloc(const struct lu_env *env,
131                               struct cl_object *o, pgoff_t ind,
132                               struct page *vmpage,
133                               enum cl_page_type type)
134 {
135         struct cl_page    *page;
136         struct lu_object_header *head;
137
138         page = kzalloc(cl_object_header(o)->coh_page_bufsize, GFP_NOFS);
139         if (page) {
140                 int result = 0;
141
142                 atomic_set(&page->cp_ref, 1);
143                 page->cp_obj = o;
144                 cl_object_get(o);
145                 lu_object_ref_add_at(&o->co_lu, &page->cp_obj_ref, "cl_page",
146                                      page);
147                 page->cp_vmpage = vmpage;
148                 cl_page_state_set_trust(page, CPS_CACHED);
149                 page->cp_type = type;
150                 INIT_LIST_HEAD(&page->cp_layers);
151                 INIT_LIST_HEAD(&page->cp_batch);
152                 lu_ref_init(&page->cp_reference);
153                 head = o->co_lu.lo_header;
154                 list_for_each_entry(o, &head->loh_layers, co_lu.lo_linkage) {
155                         if (o->co_ops->coo_page_init) {
156                                 result = o->co_ops->coo_page_init(env, o, page,
157                                                                   ind);
158                                 if (result != 0) {
159                                         cl_page_delete0(env, page);
160                                         cl_page_free(env, page);
161                                         page = ERR_PTR(result);
162                                         break;
163                                 }
164                         }
165                 }
166         } else {
167                 page = ERR_PTR(-ENOMEM);
168         }
169         return page;
170 }
171
172 /**
173  * Returns a cl_page with index \a idx at the object \a o, and associated with
174  * the VM page \a vmpage.
175  *
176  * This is the main entry point into the cl_page caching interface. First, a
177  * cache (implemented as a per-object radix tree) is consulted. If page is
178  * found there, it is returned immediately. Otherwise new page is allocated
179  * and returned. In any case, additional reference to page is acquired.
180  *
181  * \see cl_object_find(), cl_lock_find()
182  */
183 struct cl_page *cl_page_find(const struct lu_env *env,
184                              struct cl_object *o,
185                              pgoff_t idx, struct page *vmpage,
186                              enum cl_page_type type)
187 {
188         struct cl_page    *page = NULL;
189         struct cl_object_header *hdr;
190
191         LASSERT(type == CPT_CACHEABLE || type == CPT_TRANSIENT);
192         might_sleep();
193
194         hdr = cl_object_header(o);
195
196         CDEBUG(D_PAGE, "%lu@" DFID " %p %lx %d\n",
197                idx, PFID(&hdr->coh_lu.loh_fid), vmpage, vmpage->private, type);
198         /* fast path. */
199         if (type == CPT_CACHEABLE) {
200                 /*
201                  * vmpage lock is used to protect the child/parent
202                  * relationship
203                  */
204                 KLASSERT(PageLocked(vmpage));
205                 /*
206                  * cl_vmpage_page() can be called here without any locks as
207                  *
208                  *     - "vmpage" is locked (which prevents ->private from
209                  *       concurrent updates), and
210                  *
211                  *     - "o" cannot be destroyed while current thread holds a
212                  *       reference on it.
213                  */
214                 page = cl_vmpage_page(vmpage, o);
215
216                 if (page)
217                         return page;
218         }
219
220         /* allocate and initialize cl_page */
221         page = cl_page_alloc(env, o, idx, vmpage, type);
222         return page;
223 }
224 EXPORT_SYMBOL(cl_page_find);
225
226 static inline int cl_page_invariant(const struct cl_page *pg)
227 {
228         return cl_page_in_use_noref(pg);
229 }
230
231 static void cl_page_state_set0(const struct lu_env *env,
232                                struct cl_page *page, enum cl_page_state state)
233 {
234         enum cl_page_state old;
235
236         /*
237          * Matrix of allowed state transitions [old][new], for sanity
238          * checking.
239          */
240         static const int allowed_transitions[CPS_NR][CPS_NR] = {
241                 [CPS_CACHED] = {
242                         [CPS_CACHED]  = 0,
243                         [CPS_OWNED]   = 1, /* io finds existing cached page */
244                         [CPS_PAGEIN]  = 0,
245                         [CPS_PAGEOUT] = 1, /* write-out from the cache */
246                         [CPS_FREEING] = 1, /* eviction on the memory pressure */
247                 },
248                 [CPS_OWNED] = {
249                         [CPS_CACHED]  = 1, /* release to the cache */
250                         [CPS_OWNED]   = 0,
251                         [CPS_PAGEIN]  = 1, /* start read immediately */
252                         [CPS_PAGEOUT] = 1, /* start write immediately */
253                         [CPS_FREEING] = 1, /* lock invalidation or truncate */
254                 },
255                 [CPS_PAGEIN] = {
256                         [CPS_CACHED]  = 1, /* io completion */
257                         [CPS_OWNED]   = 0,
258                         [CPS_PAGEIN]  = 0,
259                         [CPS_PAGEOUT] = 0,
260                         [CPS_FREEING] = 0,
261                 },
262                 [CPS_PAGEOUT] = {
263                         [CPS_CACHED]  = 1, /* io completion */
264                         [CPS_OWNED]   = 0,
265                         [CPS_PAGEIN]  = 0,
266                         [CPS_PAGEOUT] = 0,
267                         [CPS_FREEING] = 0,
268                 },
269                 [CPS_FREEING] = {
270                         [CPS_CACHED]  = 0,
271                         [CPS_OWNED]   = 0,
272                         [CPS_PAGEIN]  = 0,
273                         [CPS_PAGEOUT] = 0,
274                         [CPS_FREEING] = 0,
275                 }
276         };
277
278         old = page->cp_state;
279         PASSERT(env, page, allowed_transitions[old][state]);
280         CL_PAGE_HEADER(D_TRACE, env, page, "%d -> %d\n", old, state);
281         PASSERT(env, page, page->cp_state == old);
282         PASSERT(env, page, equi(state == CPS_OWNED, page->cp_owner));
283         cl_page_state_set_trust(page, state);
284 }
285
286 static void cl_page_state_set(const struct lu_env *env,
287                               struct cl_page *page, enum cl_page_state state)
288 {
289         cl_page_state_set0(env, page, state);
290 }
291
292 /**
293  * Acquires an additional reference to a page.
294  *
295  * This can be called only by caller already possessing a reference to \a
296  * page.
297  *
298  * \see cl_object_get(), cl_lock_get().
299  */
300 void cl_page_get(struct cl_page *page)
301 {
302         cl_page_get_trust(page);
303 }
304 EXPORT_SYMBOL(cl_page_get);
305
306 /**
307  * Releases a reference to a page.
308  *
309  * When last reference is released, page is returned to the cache, unless it
310  * is in cl_page_state::CPS_FREEING state, in which case it is immediately
311  * destroyed.
312  *
313  * \see cl_object_put(), cl_lock_put().
314  */
315 void cl_page_put(const struct lu_env *env, struct cl_page *page)
316 {
317         CL_PAGE_HEADER(D_TRACE, env, page, "%d\n",
318                        atomic_read(&page->cp_ref));
319
320         if (atomic_dec_and_test(&page->cp_ref)) {
321                 LASSERT(page->cp_state == CPS_FREEING);
322
323                 LASSERT(atomic_read(&page->cp_ref) == 0);
324                 PASSERT(env, page, !page->cp_owner);
325                 PASSERT(env, page, list_empty(&page->cp_batch));
326                 /*
327                  * Page is no longer reachable by other threads. Tear
328                  * it down.
329                  */
330                 cl_page_free(env, page);
331         }
332 }
333 EXPORT_SYMBOL(cl_page_put);
334
335 /**
336  * Returns a cl_page associated with a VM page, and given cl_object.
337  */
338 struct cl_page *cl_vmpage_page(struct page *vmpage, struct cl_object *obj)
339 {
340         struct cl_page *page;
341
342         KLASSERT(PageLocked(vmpage));
343
344         /*
345          * NOTE: absence of races and liveness of data are guaranteed by page
346          *       lock on a "vmpage". That works because object destruction has
347          *       bottom-to-top pass.
348          */
349
350         page = (struct cl_page *)vmpage->private;
351         if (page) {
352                 cl_page_get_trust(page);
353                 LASSERT(page->cp_type == CPT_CACHEABLE);
354         }
355         return page;
356 }
357 EXPORT_SYMBOL(cl_vmpage_page);
358
359 const struct cl_page_slice *cl_page_at(const struct cl_page *page,
360                                        const struct lu_device_type *dtype)
361 {
362         return cl_page_at_trusted(page, dtype);
363 }
364 EXPORT_SYMBOL(cl_page_at);
365
366 #define CL_PAGE_OP(opname) offsetof(struct cl_page_operations, opname)
367
368 #define CL_PAGE_INVOKE(_env, _page, _op, _proto, ...)              \
369 ({                                                                    \
370         const struct lu_env     *__env  = (_env);                   \
371         struct cl_page       *__page = (_page);            \
372         const struct cl_page_slice *__scan;                          \
373         int                      __result;                         \
374         ptrdiff_t                  __op   = (_op);                   \
375         int                    (*__method)_proto;                   \
376                                                                         \
377         __result = 0;                                              \
378         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
379                 __method = *(void **)((char *)__scan->cpl_ops +  __op); \
380                 if (__method) {                                         \
381                         __result = (*__method)(__env, __scan, ## __VA_ARGS__); \
382                         if (__result != 0)                              \
383                                 break;                                  \
384                 }                                                       \
385         }                                                               \
386         if (__result > 0)                                              \
387                 __result = 0;                                      \
388         __result;                                                      \
389 })
390
391 #define CL_PAGE_INVOID(_env, _page, _op, _proto, ...)              \
392 do {                                                                \
393         const struct lu_env     *__env  = (_env);                   \
394         struct cl_page       *__page = (_page);            \
395         const struct cl_page_slice *__scan;                          \
396         ptrdiff_t                  __op   = (_op);                   \
397         void                  (*__method)_proto;                    \
398                                                                         \
399         list_for_each_entry(__scan, &__page->cp_layers, cpl_linkage) {  \
400                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
401                 if (__method)                                           \
402                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
403         }                                                               \
404 } while (0)
405
406 #define CL_PAGE_INVOID_REVERSE(_env, _page, _op, _proto, ...)          \
407 do {                                                                    \
408         const struct lu_env     *__env  = (_env);                       \
409         struct cl_page       *__page = (_page);                \
410         const struct cl_page_slice *__scan;                              \
411         ptrdiff_t                  __op   = (_op);                       \
412         void                  (*__method)_proto;                        \
413                                                                             \
414         list_for_each_entry_reverse(__scan, &__page->cp_layers, cpl_linkage) { \
415                 __method = *(void **)((char *)__scan->cpl_ops + __op);  \
416                 if (__method)                                           \
417                         (*__method)(__env, __scan, ## __VA_ARGS__);     \
418         }                                                               \
419 } while (0)
420
421 static int cl_page_invoke(const struct lu_env *env,
422                           struct cl_io *io, struct cl_page *page, ptrdiff_t op)
423
424 {
425         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
426         return CL_PAGE_INVOKE(env, page, op,
427                               (const struct lu_env *,
428                                const struct cl_page_slice *, struct cl_io *),
429                               io);
430 }
431
432 static void cl_page_invoid(const struct lu_env *env,
433                            struct cl_io *io, struct cl_page *page, ptrdiff_t op)
434
435 {
436         PINVRNT(env, page, cl_object_same(page->cp_obj, io->ci_obj));
437         CL_PAGE_INVOID(env, page, op,
438                        (const struct lu_env *,
439                         const struct cl_page_slice *, struct cl_io *), io);
440 }
441
442 static void cl_page_owner_clear(struct cl_page *page)
443 {
444         if (page->cp_owner) {
445                 LASSERT(page->cp_owner->ci_owned_nr > 0);
446                 page->cp_owner->ci_owned_nr--;
447                 page->cp_owner = NULL;
448         }
449 }
450
451 static void cl_page_owner_set(struct cl_page *page)
452 {
453         page->cp_owner->ci_owned_nr++;
454 }
455
456 void cl_page_disown0(const struct lu_env *env,
457                      struct cl_io *io, struct cl_page *pg)
458 {
459         enum cl_page_state state;
460
461         state = pg->cp_state;
462         PINVRNT(env, pg, state == CPS_OWNED || state == CPS_FREEING);
463         PINVRNT(env, pg, cl_page_invariant(pg) || state == CPS_FREEING);
464         cl_page_owner_clear(pg);
465
466         if (state == CPS_OWNED)
467                 cl_page_state_set(env, pg, CPS_CACHED);
468         /*
469          * Completion call-backs are executed in the bottom-up order, so that
470          * uppermost layer (llite), responsible for VFS/VM interaction runs
471          * last and can release locks safely.
472          */
473         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_disown),
474                                (const struct lu_env *,
475                                 const struct cl_page_slice *, struct cl_io *),
476                                io);
477 }
478
479 /**
480  * returns true, iff page is owned by the given io.
481  */
482 int cl_page_is_owned(const struct cl_page *pg, const struct cl_io *io)
483 {
484         struct cl_io *top = cl_io_top((struct cl_io *)io);
485
486         LINVRNT(cl_object_same(pg->cp_obj, io->ci_obj));
487         return pg->cp_state == CPS_OWNED && pg->cp_owner == top;
488 }
489 EXPORT_SYMBOL(cl_page_is_owned);
490
491 /**
492  * Try to own a page by IO.
493  *
494  * Waits until page is in cl_page_state::CPS_CACHED state, and then switch it
495  * into cl_page_state::CPS_OWNED state.
496  *
497  * \pre  !cl_page_is_owned(pg, io)
498  * \post result == 0 iff cl_page_is_owned(pg, io)
499  *
500  * \retval 0   success
501  *
502  * \retval -ve failure, e.g., page was destroyed (and landed in
503  *           cl_page_state::CPS_FREEING instead of cl_page_state::CPS_CACHED).
504  *           or, page was owned by another thread, or in IO.
505  *
506  * \see cl_page_disown()
507  * \see cl_page_operations::cpo_own()
508  * \see cl_page_own_try()
509  * \see cl_page_own
510  */
511 static int cl_page_own0(const struct lu_env *env, struct cl_io *io,
512                         struct cl_page *pg, int nonblock)
513 {
514         int result;
515
516         PINVRNT(env, pg, !cl_page_is_owned(pg, io));
517
518         io = cl_io_top(io);
519
520         if (pg->cp_state == CPS_FREEING) {
521                 result = -ENOENT;
522         } else {
523                 result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(cpo_own),
524                                         (const struct lu_env *,
525                                          const struct cl_page_slice *,
526                                          struct cl_io *, int),
527                                         io, nonblock);
528                 if (result == 0) {
529                         PASSERT(env, pg, !pg->cp_owner);
530                         pg->cp_owner = cl_io_top(io);
531                         cl_page_owner_set(pg);
532                         if (pg->cp_state != CPS_FREEING) {
533                                 cl_page_state_set(env, pg, CPS_OWNED);
534                         } else {
535                                 cl_page_disown0(env, io, pg);
536                                 result = -ENOENT;
537                         }
538                 }
539         }
540         PINVRNT(env, pg, ergo(result == 0, cl_page_invariant(pg)));
541         return result;
542 }
543
544 /**
545  * Own a page, might be blocked.
546  *
547  * \see cl_page_own0()
548  */
549 int cl_page_own(const struct lu_env *env, struct cl_io *io, struct cl_page *pg)
550 {
551         return cl_page_own0(env, io, pg, 0);
552 }
553 EXPORT_SYMBOL(cl_page_own);
554
555 /**
556  * Nonblock version of cl_page_own().
557  *
558  * \see cl_page_own0()
559  */
560 int cl_page_own_try(const struct lu_env *env, struct cl_io *io,
561                     struct cl_page *pg)
562 {
563         return cl_page_own0(env, io, pg, 1);
564 }
565 EXPORT_SYMBOL(cl_page_own_try);
566
567 /**
568  * Assume page ownership.
569  *
570  * Called when page is already locked by the hosting VM.
571  *
572  * \pre !cl_page_is_owned(pg, io)
573  * \post cl_page_is_owned(pg, io)
574  *
575  * \see cl_page_operations::cpo_assume()
576  */
577 void cl_page_assume(const struct lu_env *env,
578                     struct cl_io *io, struct cl_page *pg)
579 {
580         PINVRNT(env, pg, cl_object_same(pg->cp_obj, io->ci_obj));
581
582         io = cl_io_top(io);
583
584         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_assume));
585         PASSERT(env, pg, !pg->cp_owner);
586         pg->cp_owner = cl_io_top(io);
587         cl_page_owner_set(pg);
588         cl_page_state_set(env, pg, CPS_OWNED);
589 }
590 EXPORT_SYMBOL(cl_page_assume);
591
592 /**
593  * Releases page ownership without unlocking the page.
594  *
595  * Moves page into cl_page_state::CPS_CACHED without releasing a lock on the
596  * underlying VM page (as VM is supposed to do this itself).
597  *
598  * \pre   cl_page_is_owned(pg, io)
599  * \post !cl_page_is_owned(pg, io)
600  *
601  * \see cl_page_assume()
602  */
603 void cl_page_unassume(const struct lu_env *env,
604                       struct cl_io *io, struct cl_page *pg)
605 {
606         PINVRNT(env, pg, cl_page_is_owned(pg, io));
607         PINVRNT(env, pg, cl_page_invariant(pg));
608
609         io = cl_io_top(io);
610         cl_page_owner_clear(pg);
611         cl_page_state_set(env, pg, CPS_CACHED);
612         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_unassume),
613                                (const struct lu_env *,
614                                 const struct cl_page_slice *, struct cl_io *),
615                                io);
616 }
617 EXPORT_SYMBOL(cl_page_unassume);
618
619 /**
620  * Releases page ownership.
621  *
622  * Moves page into cl_page_state::CPS_CACHED.
623  *
624  * \pre   cl_page_is_owned(pg, io)
625  * \post !cl_page_is_owned(pg, io)
626  *
627  * \see cl_page_own()
628  * \see cl_page_operations::cpo_disown()
629  */
630 void cl_page_disown(const struct lu_env *env,
631                     struct cl_io *io, struct cl_page *pg)
632 {
633         PINVRNT(env, pg, cl_page_is_owned(pg, io) ||
634                 pg->cp_state == CPS_FREEING);
635
636         io = cl_io_top(io);
637         cl_page_disown0(env, io, pg);
638 }
639 EXPORT_SYMBOL(cl_page_disown);
640
641 /**
642  * Called when page is to be removed from the object, e.g., as a result of
643  * truncate.
644  *
645  * Calls cl_page_operations::cpo_discard() top-to-bottom.
646  *
647  * \pre cl_page_is_owned(pg, io)
648  *
649  * \see cl_page_operations::cpo_discard()
650  */
651 void cl_page_discard(const struct lu_env *env,
652                      struct cl_io *io, struct cl_page *pg)
653 {
654         PINVRNT(env, pg, cl_page_is_owned(pg, io));
655         PINVRNT(env, pg, cl_page_invariant(pg));
656
657         cl_page_invoid(env, io, pg, CL_PAGE_OP(cpo_discard));
658 }
659 EXPORT_SYMBOL(cl_page_discard);
660
661 /**
662  * Version of cl_page_delete() that can be called for not fully constructed
663  * pages, e.g,. in a error handling cl_page_find()->cl_page_delete0()
664  * path. Doesn't check page invariant.
665  */
666 static void cl_page_delete0(const struct lu_env *env, struct cl_page *pg)
667 {
668         PASSERT(env, pg, pg->cp_state != CPS_FREEING);
669
670         /*
671          * Sever all ways to obtain new pointers to @pg.
672          */
673         cl_page_owner_clear(pg);
674
675         cl_page_state_set0(env, pg, CPS_FREEING);
676
677         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(cpo_delete),
678                                (const struct lu_env *,
679                                 const struct cl_page_slice *));
680 }
681
682 /**
683  * Called when a decision is made to throw page out of memory.
684  *
685  * Notifies all layers about page destruction by calling
686  * cl_page_operations::cpo_delete() method top-to-bottom.
687  *
688  * Moves page into cl_page_state::CPS_FREEING state (this is the only place
689  * where transition to this state happens).
690  *
691  * Eliminates all venues through which new references to the page can be
692  * obtained:
693  *
694  *     - removes page from the radix trees,
695  *
696  *     - breaks linkage from VM page to cl_page.
697  *
698  * Once page reaches cl_page_state::CPS_FREEING, all remaining references will
699  * drain after some time, at which point page will be recycled.
700  *
701  * \pre  VM page is locked
702  * \post pg->cp_state == CPS_FREEING
703  *
704  * \see cl_page_operations::cpo_delete()
705  */
706 void cl_page_delete(const struct lu_env *env, struct cl_page *pg)
707 {
708         PINVRNT(env, pg, cl_page_invariant(pg));
709         cl_page_delete0(env, pg);
710 }
711 EXPORT_SYMBOL(cl_page_delete);
712
713 /**
714  * Marks page up-to-date.
715  *
716  * Call cl_page_operations::cpo_export() through all layers top-to-bottom. The
717  * layer responsible for VM interaction has to mark/clear page as up-to-date
718  * by the \a uptodate argument.
719  *
720  * \see cl_page_operations::cpo_export()
721  */
722 void cl_page_export(const struct lu_env *env, struct cl_page *pg, int uptodate)
723 {
724         PINVRNT(env, pg, cl_page_invariant(pg));
725         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_export),
726                        (const struct lu_env *,
727                         const struct cl_page_slice *, int), uptodate);
728 }
729 EXPORT_SYMBOL(cl_page_export);
730
731 /**
732  * Returns true, iff \a pg is VM locked in a suitable sense by the calling
733  * thread.
734  */
735 int cl_page_is_vmlocked(const struct lu_env *env, const struct cl_page *pg)
736 {
737         int result;
738         const struct cl_page_slice *slice;
739
740         slice = container_of(pg->cp_layers.next,
741                              const struct cl_page_slice, cpl_linkage);
742         PASSERT(env, pg, slice->cpl_ops->cpo_is_vmlocked);
743         /*
744          * Call ->cpo_is_vmlocked() directly instead of going through
745          * CL_PAGE_INVOKE(), because cl_page_is_vmlocked() is used by
746          * cl_page_invariant().
747          */
748         result = slice->cpl_ops->cpo_is_vmlocked(env, slice);
749         PASSERT(env, pg, result == -EBUSY || result == -ENODATA);
750         return result == -EBUSY;
751 }
752 EXPORT_SYMBOL(cl_page_is_vmlocked);
753
754 static enum cl_page_state cl_req_type_state(enum cl_req_type crt)
755 {
756         return crt == CRT_WRITE ? CPS_PAGEOUT : CPS_PAGEIN;
757 }
758
759 static void cl_page_io_start(const struct lu_env *env,
760                              struct cl_page *pg, enum cl_req_type crt)
761 {
762         /*
763          * Page is queued for IO, change its state.
764          */
765         cl_page_owner_clear(pg);
766         cl_page_state_set(env, pg, cl_req_type_state(crt));
767 }
768
769 /**
770  * Prepares page for immediate transfer. cl_page_operations::cpo_prep() is
771  * called top-to-bottom. Every layer either agrees to submit this page (by
772  * returning 0), or requests to omit this page (by returning -EALREADY). Layer
773  * handling interactions with the VM also has to inform VM that page is under
774  * transfer now.
775  */
776 int cl_page_prep(const struct lu_env *env, struct cl_io *io,
777                  struct cl_page *pg, enum cl_req_type crt)
778 {
779         int result;
780
781         PINVRNT(env, pg, cl_page_is_owned(pg, io));
782         PINVRNT(env, pg, cl_page_invariant(pg));
783         PINVRNT(env, pg, crt < CRT_NR);
784
785         /*
786          * XXX this has to be called bottom-to-top, so that llite can set up
787          * PG_writeback without risking other layers deciding to skip this
788          * page.
789          */
790         if (crt >= CRT_NR)
791                 return -EINVAL;
792         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(io[crt].cpo_prep));
793         if (result == 0)
794                 cl_page_io_start(env, pg, crt);
795
796         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
797         return result;
798 }
799 EXPORT_SYMBOL(cl_page_prep);
800
801 /**
802  * Notify layers about transfer completion.
803  *
804  * Invoked by transfer sub-system (which is a part of osc) to notify layers
805  * that a transfer, of which this page is a part of has completed.
806  *
807  * Completion call-backs are executed in the bottom-up order, so that
808  * uppermost layer (llite), responsible for the VFS/VM interaction runs last
809  * and can release locks safely.
810  *
811  * \pre  pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
812  * \post pg->cp_state == CPS_CACHED
813  *
814  * \see cl_page_operations::cpo_completion()
815  */
816 void cl_page_completion(const struct lu_env *env,
817                         struct cl_page *pg, enum cl_req_type crt, int ioret)
818 {
819         struct cl_sync_io *anchor = pg->cp_sync_io;
820
821         PASSERT(env, pg, crt < CRT_NR);
822         PASSERT(env, pg, pg->cp_state == cl_req_type_state(crt));
823
824         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, ioret);
825
826         cl_page_state_set(env, pg, CPS_CACHED);
827         if (crt >= CRT_NR)
828                 return;
829         CL_PAGE_INVOID_REVERSE(env, pg, CL_PAGE_OP(io[crt].cpo_completion),
830                                (const struct lu_env *,
831                                 const struct cl_page_slice *, int), ioret);
832         if (anchor) {
833                 LASSERT(pg->cp_sync_io == anchor);
834                 pg->cp_sync_io = NULL;
835                 cl_sync_io_note(env, anchor, ioret);
836         }
837 }
838 EXPORT_SYMBOL(cl_page_completion);
839
840 /**
841  * Notify layers that transfer formation engine decided to yank this page from
842  * the cache and to make it a part of a transfer.
843  *
844  * \pre  pg->cp_state == CPS_CACHED
845  * \post pg->cp_state == CPS_PAGEIN || pg->cp_state == CPS_PAGEOUT
846  *
847  * \see cl_page_operations::cpo_make_ready()
848  */
849 int cl_page_make_ready(const struct lu_env *env, struct cl_page *pg,
850                        enum cl_req_type crt)
851 {
852         int result;
853
854         PINVRNT(env, pg, crt < CRT_NR);
855
856         if (crt >= CRT_NR)
857                 return -EINVAL;
858         result = CL_PAGE_INVOKE(env, pg, CL_PAGE_OP(io[crt].cpo_make_ready),
859                                 (const struct lu_env *,
860                                  const struct cl_page_slice *));
861         if (result == 0) {
862                 PASSERT(env, pg, pg->cp_state == CPS_CACHED);
863                 cl_page_io_start(env, pg, crt);
864         }
865         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", crt, result);
866         return result;
867 }
868 EXPORT_SYMBOL(cl_page_make_ready);
869
870 /**
871  * Called if a pge is being written back by kernel's intention.
872  *
873  * \pre  cl_page_is_owned(pg, io)
874  * \post ergo(result == 0, pg->cp_state == CPS_PAGEOUT)
875  *
876  * \see cl_page_operations::cpo_flush()
877  */
878 int cl_page_flush(const struct lu_env *env, struct cl_io *io,
879                   struct cl_page *pg)
880 {
881         int result;
882
883         PINVRNT(env, pg, cl_page_is_owned(pg, io));
884         PINVRNT(env, pg, cl_page_invariant(pg));
885
886         result = cl_page_invoke(env, io, pg, CL_PAGE_OP(cpo_flush));
887
888         CL_PAGE_HEADER(D_TRACE, env, pg, "%d\n", result);
889         return result;
890 }
891 EXPORT_SYMBOL(cl_page_flush);
892
893 /**
894  * Tells transfer engine that only part of a page is to be transmitted.
895  *
896  * \see cl_page_operations::cpo_clip()
897  */
898 void cl_page_clip(const struct lu_env *env, struct cl_page *pg,
899                   int from, int to)
900 {
901         PINVRNT(env, pg, cl_page_invariant(pg));
902
903         CL_PAGE_HEADER(D_TRACE, env, pg, "%d %d\n", from, to);
904         CL_PAGE_INVOID(env, pg, CL_PAGE_OP(cpo_clip),
905                        (const struct lu_env *,
906                         const struct cl_page_slice *, int, int),
907                        from, to);
908 }
909 EXPORT_SYMBOL(cl_page_clip);
910
911 /**
912  * Prints human readable representation of \a pg to the \a f.
913  */
914 void cl_page_header_print(const struct lu_env *env, void *cookie,
915                           lu_printer_t printer, const struct cl_page *pg)
916 {
917         (*printer)(env, cookie,
918                    "page@%p[%d %p %d %d %p]\n",
919                    pg, atomic_read(&pg->cp_ref), pg->cp_obj,
920                    pg->cp_state, pg->cp_type,
921                    pg->cp_owner);
922 }
923 EXPORT_SYMBOL(cl_page_header_print);
924
925 /**
926  * Prints human readable representation of \a pg to the \a f.
927  */
928 void cl_page_print(const struct lu_env *env, void *cookie,
929                    lu_printer_t printer, const struct cl_page *pg)
930 {
931         cl_page_header_print(env, cookie, printer, pg);
932         CL_PAGE_INVOKE(env, (struct cl_page *)pg, CL_PAGE_OP(cpo_print),
933                        (const struct lu_env *env,
934                         const struct cl_page_slice *slice,
935                         void *cookie, lu_printer_t p), cookie, printer);
936         (*printer)(env, cookie, "end page@%p\n", pg);
937 }
938 EXPORT_SYMBOL(cl_page_print);
939
940 /**
941  * Cancel a page which is still in a transfer.
942  */
943 int cl_page_cancel(const struct lu_env *env, struct cl_page *page)
944 {
945         return CL_PAGE_INVOKE(env, page, CL_PAGE_OP(cpo_cancel),
946                               (const struct lu_env *,
947                                const struct cl_page_slice *));
948 }
949
950 /**
951  * Converts a byte offset within object \a obj into a page index.
952  */
953 loff_t cl_offset(const struct cl_object *obj, pgoff_t idx)
954 {
955         /*
956          * XXX for now.
957          */
958         return (loff_t)idx << PAGE_SHIFT;
959 }
960 EXPORT_SYMBOL(cl_offset);
961
962 /**
963  * Converts a page index into a byte offset within object \a obj.
964  */
965 pgoff_t cl_index(const struct cl_object *obj, loff_t offset)
966 {
967         /*
968          * XXX for now.
969          */
970         return offset >> PAGE_SHIFT;
971 }
972 EXPORT_SYMBOL(cl_index);
973
974 size_t cl_page_size(const struct cl_object *obj)
975 {
976         return 1UL << PAGE_SHIFT;
977 }
978 EXPORT_SYMBOL(cl_page_size);
979
980 /**
981  * Adds page slice to the compound page.
982  *
983  * This is called by cl_object_operations::coo_page_init() methods to add a
984  * per-layer state to the page. New state is added at the end of
985  * cl_page::cp_layers list, that is, it is at the bottom of the stack.
986  *
987  * \see cl_lock_slice_add(), cl_req_slice_add(), cl_io_slice_add()
988  */
989 void cl_page_slice_add(struct cl_page *page, struct cl_page_slice *slice,
990                        struct cl_object *obj, pgoff_t index,
991                        const struct cl_page_operations *ops)
992 {
993         list_add_tail(&slice->cpl_linkage, &page->cp_layers);
994         slice->cpl_obj  = obj;
995         slice->cpl_index = index;
996         slice->cpl_ops  = ops;
997         slice->cpl_page = page;
998 }
999 EXPORT_SYMBOL(cl_page_slice_add);
1000
1001 /**
1002  * Allocate and initialize cl_cache, called by ll_init_sbi().
1003  */
1004 struct cl_client_cache *cl_cache_init(unsigned long lru_page_max)
1005 {
1006         struct cl_client_cache  *cache = NULL;
1007
1008         cache = kzalloc(sizeof(*cache), GFP_KERNEL);
1009         if (!cache)
1010                 return NULL;
1011
1012         /* Initialize cache data */
1013         atomic_set(&cache->ccc_users, 1);
1014         cache->ccc_lru_max = lru_page_max;
1015         atomic_long_set(&cache->ccc_lru_left, lru_page_max);
1016         spin_lock_init(&cache->ccc_lru_lock);
1017         INIT_LIST_HEAD(&cache->ccc_lru);
1018
1019         atomic_long_set(&cache->ccc_unstable_nr, 0);
1020         init_waitqueue_head(&cache->ccc_unstable_waitq);
1021
1022         return cache;
1023 }
1024 EXPORT_SYMBOL(cl_cache_init);
1025
1026 /**
1027  * Increase cl_cache refcount
1028  */
1029 void cl_cache_incref(struct cl_client_cache *cache)
1030 {
1031         atomic_inc(&cache->ccc_users);
1032 }
1033 EXPORT_SYMBOL(cl_cache_incref);
1034
1035 /**
1036  * Decrease cl_cache refcount and free the cache if refcount=0.
1037  * Since llite, lov and osc all hold cl_cache refcount,
1038  * the free will not cause race. (LU-6173)
1039  */
1040 void cl_cache_decref(struct cl_client_cache *cache)
1041 {
1042         if (atomic_dec_and_test(&cache->ccc_users))
1043                 kfree(cache);
1044 }
1045 EXPORT_SYMBOL(cl_cache_decref);