2 * Unix SMB/CIFS implementation.
3 * threadpool implementation based on pthreads
4 * Copyright (C) Volker Lendecke 2009,2011
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/threads.h"
23 #include "system/filesys.h"
24 #include "pthreadpool_tevent.h"
25 #include "pthreadpool.h"
26 #include "lib/util/tevent_unix.h"
27 #include "lib/util/dlinklist.h"
28 #include "lib/util/attr.h"
31 * We try to give some hints to helgrind/drd
33 * Note ANNOTATE_BENIGN_RACE_SIZED(address, size, describtion)
34 * takes an memory address range that ignored by helgrind/drd
35 * 'description' is just ignored...
38 * Note that ANNOTATE_HAPPENS_*(unique_uintptr)
39 * just takes a DWORD/(void *) as unique key
42 #ifdef HAVE_VALGRIND_HELGRIND_H
43 #include <valgrind/helgrind.h>
45 #ifndef ANNOTATE_BENIGN_RACE_SIZED
46 #define ANNOTATE_BENIGN_RACE_SIZED(address, size, describtion)
48 #ifndef ANNOTATE_HAPPENS_BEFORE
49 #define ANNOTATE_HAPPENS_BEFORE(unique_uintptr)
51 #ifndef ANNOTATE_HAPPENS_AFTER
52 #define ANNOTATE_HAPPENS_AFTER(unique_uintptr)
54 #ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL
55 #define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(unique_uintptr)
58 #define PTHREAD_TEVENT_JOB_THREAD_FENCE_INIT(__job) do { \
59 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
60 ANNOTATE_BENIGN_RACE_SIZED(&__j->needs_fence, \
61 sizeof(__j->needs_fence), \
62 "race by design, protected by fence"); \
65 #ifdef WITH_PTHREADPOOL
67 * configure checked we have pthread and atomic_thread_fence() available
69 #define __PTHREAD_TEVENT_JOB_THREAD_FENCE(__order) do { \
70 atomic_thread_fence(__order); \
74 * we're using lib/pthreadpool/pthreadpool_sync.c ...
76 #define __PTHREAD_TEVENT_JOB_THREAD_FENCE(__order) do { } while(0)
82 #define PTHREAD_TEVENT_JOB_THREAD_FENCE(__job) do { \
83 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
84 ANNOTATE_HAPPENS_BEFORE(&__job->needs_fence); \
85 __PTHREAD_TEVENT_JOB_THREAD_FENCE(memory_order_seq_cst); \
86 ANNOTATE_HAPPENS_AFTER(&__job->needs_fence); \
89 #define PTHREAD_TEVENT_JOB_THREAD_FENCE_FINI(__job) do { \
90 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
91 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&__job->needs_fence); \
94 struct pthreadpool_tevent_job_state;
97 * We need one pthreadpool_tevent_glue object per unique combintaion of tevent
98 * contexts and pthreadpool_tevent objects. Maintain a list of used tevent
99 * contexts in a pthreadpool_tevent.
101 struct pthreadpool_tevent_glue {
102 struct pthreadpool_tevent_glue *prev, *next;
103 struct pthreadpool_tevent *pool; /* back-pointer to owning object. */
104 /* Tuple we are keeping track of in this list. */
105 struct tevent_context *ev;
106 struct tevent_threaded_context *tctx;
107 /* Pointer to link object owned by *ev. */
108 struct pthreadpool_tevent_glue_ev_link *ev_link;
110 struct pthreadpool_tevent_job_state *states;
114 * The pthreadpool_tevent_glue_ev_link and its destructor ensure we remove the
115 * tevent context from our list of active event contexts if the event context
117 * This structure is talloc()'ed from the struct tevent_context *, and is a
118 * back-pointer allowing the related struct pthreadpool_tevent_glue object
119 * to be removed from the struct pthreadpool_tevent glue list if the owning
120 * tevent_context is talloc_free()'ed.
122 struct pthreadpool_tevent_glue_ev_link {
123 struct pthreadpool_tevent_glue *glue;
126 struct pthreadpool_tevent {
127 struct pthreadpool *pool;
128 struct pthreadpool_tevent_glue *glue_list;
130 struct pthreadpool_tevent_job *jobs;
133 struct pthreadpool_tevent_job_state {
134 struct pthreadpool_tevent_job_state *prev, *next;
135 struct pthreadpool_tevent_glue *glue;
136 struct tevent_context *ev;
137 struct tevent_req *req;
138 struct pthreadpool_tevent_job *job;
141 struct pthreadpool_tevent_job {
142 struct pthreadpool_tevent_job *prev, *next;
144 struct pthreadpool_tevent *pool;
145 struct pthreadpool_tevent_job_state *state;
146 struct tevent_immediate *im;
148 void (*fn)(void *private_data);
152 * Coordination between threads
154 * There're only one side writing each element
155 * either the main process or the job thread.
157 * The coordination is done by a full memory
158 * barrier using atomic_thread_fence(memory_order_seq_cst)
159 * wrapped in PTHREAD_TEVENT_JOB_THREAD_FENCE()
164 * set when tevent_req_cancel() is called.
165 * (only written by main thread!)
171 * set when talloc_free is called on the job request,
172 * tevent_context or pthreadpool_tevent.
173 * (only written by main thread!)
179 * set when the job is picked up by a worker thread
180 * (only written by job thread!)
186 * set once the job function returned.
187 * (only written by job thread!)
193 * set when pthreadpool_tevent_job_signal() is entered
194 * (only written by job thread!)
200 * set when pthreadpool_tevent_job_signal() leaves with
201 * orphaned already set.
202 * (only written by job thread!)
208 * set when pthreadpool_tevent_job_signal() leaves normal
209 * and the immediate event was scheduled.
210 * (only written by job thread!)
216 static int pthreadpool_tevent_destructor(struct pthreadpool_tevent *pool);
218 static void pthreadpool_tevent_job_orphan(struct pthreadpool_tevent_job *job);
220 static struct pthreadpool_tevent_job *orphaned_jobs;
222 void pthreadpool_tevent_cleanup_orphaned_jobs(void)
224 struct pthreadpool_tevent_job *job = NULL;
225 struct pthreadpool_tevent_job *njob = NULL;
227 for (job = orphaned_jobs; job != NULL; job = njob) {
231 * The job destructor keeps the job alive
232 * (and in the list) or removes it from the list.
238 static int pthreadpool_tevent_job_signal(int jobid,
239 void (*job_fn)(void *private_data),
240 void *job_private_data,
243 int pthreadpool_tevent_init(TALLOC_CTX *mem_ctx, unsigned max_threads,
244 struct pthreadpool_tevent **presult)
246 struct pthreadpool_tevent *pool;
249 pthreadpool_tevent_cleanup_orphaned_jobs();
251 pool = talloc_zero(mem_ctx, struct pthreadpool_tevent);
256 ret = pthreadpool_init(max_threads, &pool->pool,
257 pthreadpool_tevent_job_signal, pool);
263 talloc_set_destructor(pool, pthreadpool_tevent_destructor);
269 size_t pthreadpool_tevent_max_threads(struct pthreadpool_tevent *pool)
271 if (pool->pool == NULL) {
275 return pthreadpool_max_threads(pool->pool);
278 size_t pthreadpool_tevent_queued_jobs(struct pthreadpool_tevent *pool)
280 if (pool->pool == NULL) {
284 return pthreadpool_queued_jobs(pool->pool);
287 static int pthreadpool_tevent_destructor(struct pthreadpool_tevent *pool)
289 struct pthreadpool_tevent_job *job = NULL;
290 struct pthreadpool_tevent_job *njob = NULL;
291 struct pthreadpool_tevent_glue *glue = NULL;
294 ret = pthreadpool_stop(pool->pool);
299 for (job = pool->jobs; job != NULL; job = njob) {
302 /* The job this removes it from the list */
303 pthreadpool_tevent_job_orphan(job);
307 * Delete all the registered
308 * tevent_context/tevent_threaded_context
311 for (glue = pool->glue_list; glue != NULL; glue = pool->glue_list) {
312 /* The glue destructor removes it from the list */
315 pool->glue_list = NULL;
317 ret = pthreadpool_destroy(pool->pool);
323 pthreadpool_tevent_cleanup_orphaned_jobs();
328 static int pthreadpool_tevent_glue_destructor(
329 struct pthreadpool_tevent_glue *glue)
331 struct pthreadpool_tevent_job_state *state = NULL;
332 struct pthreadpool_tevent_job_state *nstate = NULL;
334 for (state = glue->states; state != NULL; state = nstate) {
335 nstate = state->next;
337 /* The job this removes it from the list */
338 pthreadpool_tevent_job_orphan(state->job);
341 if (glue->pool->glue_list != NULL) {
342 DLIST_REMOVE(glue->pool->glue_list, glue);
345 /* Ensure the ev_link destructor knows we're gone */
346 glue->ev_link->glue = NULL;
348 TALLOC_FREE(glue->ev_link);
349 TALLOC_FREE(glue->tctx);
355 * Destructor called either explicitly from
356 * pthreadpool_tevent_glue_destructor(), or indirectly
357 * when owning tevent_context is destroyed.
359 * When called from pthreadpool_tevent_glue_destructor()
360 * ev_link->glue is already NULL, so this does nothing.
362 * When called from talloc_free() of the owning
363 * tevent_context we must ensure we also remove the
364 * linked glue object from the list inside
365 * struct pthreadpool_tevent.
367 static int pthreadpool_tevent_glue_link_destructor(
368 struct pthreadpool_tevent_glue_ev_link *ev_link)
370 TALLOC_FREE(ev_link->glue);
374 static int pthreadpool_tevent_register_ev(
375 struct pthreadpool_tevent *pool,
376 struct pthreadpool_tevent_job_state *state)
378 struct tevent_context *ev = state->ev;
379 struct pthreadpool_tevent_glue *glue = NULL;
380 struct pthreadpool_tevent_glue_ev_link *ev_link = NULL;
383 * See if this tevent_context was already registered by
384 * searching the glue object list. If so we have nothing
385 * to do here - we already have a tevent_context/tevent_threaded_context
388 for (glue = pool->glue_list; glue != NULL; glue = glue->next) {
389 if (glue->ev == state->ev) {
391 DLIST_ADD_END(glue->states, state);
397 * Event context not yet registered - create a new glue
398 * object containing a tevent_context/tevent_threaded_context
399 * pair and put it on the list to remember this registration.
400 * We also need a link object to ensure the event context
401 * can't go away without us knowing about it.
403 glue = talloc_zero(pool, struct pthreadpool_tevent_glue);
407 *glue = (struct pthreadpool_tevent_glue) {
411 talloc_set_destructor(glue, pthreadpool_tevent_glue_destructor);
414 * Now allocate the link object to the event context. Note this
415 * is allocated OFF THE EVENT CONTEXT ITSELF, so if the event
416 * context is freed we are able to cleanup the glue object
417 * in the link object destructor.
420 ev_link = talloc_zero(ev, struct pthreadpool_tevent_glue_ev_link);
421 if (ev_link == NULL) {
425 ev_link->glue = glue;
426 talloc_set_destructor(ev_link, pthreadpool_tevent_glue_link_destructor);
428 glue->ev_link = ev_link;
431 glue->tctx = tevent_threaded_context_create(glue, ev);
432 if (glue->tctx == NULL) {
433 TALLOC_FREE(ev_link);
440 DLIST_ADD_END(glue->states, state);
442 DLIST_ADD(pool->glue_list, glue);
446 static void pthreadpool_tevent_job_fn(void *private_data);
447 static void pthreadpool_tevent_job_done(struct tevent_context *ctx,
448 struct tevent_immediate *im,
450 static bool pthreadpool_tevent_job_cancel(struct tevent_req *req);
452 static int pthreadpool_tevent_job_destructor(struct pthreadpool_tevent_job *job)
455 * We should never be called with needs_fence.orphaned == false.
456 * Only pthreadpool_tevent_job_orphan() will call TALLOC_FREE(job)
457 * after detaching from the request state, glue and pool list.
459 if (!job->needs_fence.orphaned) {
464 * If the job is not finished (job->im still there)
465 * and it's still attached to the pool,
466 * we try to cancel it (before it was starts)
468 if (job->im != NULL && job->pool != NULL) {
471 num = pthreadpool_cancel_job(job->pool->pool, 0,
472 pthreadpool_tevent_job_fn,
476 * It was not too late to cancel the request.
478 * We can remove job->im, as it will never be used.
480 TALLOC_FREE(job->im);
484 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
485 if (job->needs_fence.dropped) {
487 * The signal function saw job->needs_fence.orphaned
488 * before it started the signaling via the immediate
489 * event. So we'll never geht triggered and can
490 * remove job->im and let the whole job go...
492 TALLOC_FREE(job->im);
496 * pthreadpool_tevent_job_orphan() already removed
497 * it from pool->jobs. And we don't need try
498 * pthreadpool_cancel_job() again.
502 if (job->im != NULL) {
504 * state->im still there means, we need to wait for the
505 * immediate event to be triggered or just leak the memory.
507 * Move it to the orphaned list, if it's not already there.
513 * Finally remove from the orphaned_jobs list
514 * and let talloc destroy us.
516 DLIST_REMOVE(orphaned_jobs, job);
518 PTHREAD_TEVENT_JOB_THREAD_FENCE_FINI(job);
522 static void pthreadpool_tevent_job_orphan(struct pthreadpool_tevent_job *job)
524 job->needs_fence.orphaned = true;
525 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
528 * We're the only function that sets
531 if (job->state == NULL) {
536 * Once we marked the request as 'orphaned'
537 * we spin/loop if it's already marked
538 * as 'finished' (which means that
539 * pthreadpool_tevent_job_signal() was entered.
540 * If it saw 'orphaned' it will exit after setting
541 * 'dropped', otherwise it dereferences
542 * job->state->glue->{tctx,ev} until it exited
543 * after setting 'signaled'.
545 * We need to close this potential gab before
546 * we can set job->state = NULL.
548 * This is some kind of spinlock, but with
549 * 1 millisecond sleeps in between, in order
550 * to give the thread more cpu time to finish.
552 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
553 while (job->needs_fence.finished) {
554 if (job->needs_fence.dropped) {
557 if (job->needs_fence.signaled) {
561 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
565 * Once the gab is closed, we can remove
568 DLIST_REMOVE(job->state->glue->states, job->state);
569 job->state->glue = NULL;
572 * We need to reparent to a long term context.
573 * And detach from the request state.
574 * Maybe the destructor will keep the memory
575 * and leak it for now.
577 (void)talloc_reparent(job->state, NULL, job);
578 job->state->job = NULL;
582 * job->pool will only be set to NULL
583 * in the first destructur run.
585 if (job->pool == NULL) {
590 * Dettach it from the pool.
592 * The job might still be running,
593 * so we keep job->pool.
594 * The destructor will set it to NULL
595 * after trying pthreadpool_cancel_job()
597 DLIST_REMOVE(job->pool->jobs, job);
600 * Add it to the list of orphaned jobs,
601 * which may be cleaned up later.
603 * The destructor removes it from the list
604 * when possible or it denies the free
605 * and keep it in the list.
607 DLIST_ADD_END(orphaned_jobs, job);
611 static void pthreadpool_tevent_job_cleanup(struct tevent_req *req,
612 enum tevent_req_state req_state)
614 struct pthreadpool_tevent_job_state *state =
616 struct pthreadpool_tevent_job_state);
618 if (state->job == NULL) {
620 * The job request is not scheduled in the pool
623 if (state->glue != NULL) {
624 DLIST_REMOVE(state->glue->states, state);
631 * We need to reparent to a long term context.
632 * Maybe the destructor will keep the memory
633 * and leak it for now.
635 pthreadpool_tevent_job_orphan(state->job);
636 state->job = NULL; /* not needed but looks better */
640 struct tevent_req *pthreadpool_tevent_job_send(
641 TALLOC_CTX *mem_ctx, struct tevent_context *ev,
642 struct pthreadpool_tevent *pool,
643 void (*fn)(void *private_data), void *private_data)
645 struct tevent_req *req = NULL;
646 struct pthreadpool_tevent_job_state *state = NULL;
647 struct pthreadpool_tevent_job *job = NULL;
650 pthreadpool_tevent_cleanup_orphaned_jobs();
652 req = tevent_req_create(mem_ctx, &state,
653 struct pthreadpool_tevent_job_state);
660 tevent_req_set_cleanup_fn(req, pthreadpool_tevent_job_cleanup);
663 tevent_req_error(req, EINVAL);
664 return tevent_req_post(req, ev);
666 if (pool->pool == NULL) {
667 tevent_req_error(req, EINVAL);
668 return tevent_req_post(req, ev);
671 ret = pthreadpool_tevent_register_ev(pool, state);
672 if (tevent_req_error(req, ret)) {
673 return tevent_req_post(req, ev);
676 job = talloc_zero(state, struct pthreadpool_tevent_job);
677 if (tevent_req_nomem(job, req)) {
678 return tevent_req_post(req, ev);
682 job->private_data = private_data;
683 job->im = tevent_create_immediate(state->job);
684 if (tevent_req_nomem(job->im, req)) {
685 return tevent_req_post(req, ev);
687 PTHREAD_TEVENT_JOB_THREAD_FENCE_INIT(job);
688 talloc_set_destructor(job, pthreadpool_tevent_job_destructor);
689 DLIST_ADD_END(job->pool->jobs, job);
693 ret = pthreadpool_add_job(job->pool->pool, 0,
694 pthreadpool_tevent_job_fn,
696 if (tevent_req_error(req, ret)) {
697 return tevent_req_post(req, ev);
700 tevent_req_set_cancel_fn(req, pthreadpool_tevent_job_cancel);
704 static __thread struct pthreadpool_tevent_job *current_job;
706 bool pthreadpool_tevent_current_job_canceled(void)
708 if (current_job == NULL) {
710 * Should only be called from within
717 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
718 return current_job->needs_fence.maycancel;
721 bool pthreadpool_tevent_current_job_orphaned(void)
723 if (current_job == NULL) {
725 * Should only be called from within
732 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
733 return current_job->needs_fence.orphaned;
736 bool pthreadpool_tevent_current_job_continue(void)
738 if (current_job == NULL) {
740 * Should only be called from within
747 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
748 if (current_job->needs_fence.maycancel) {
751 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
752 if (current_job->needs_fence.orphaned) {
759 static void pthreadpool_tevent_job_fn(void *private_data)
761 struct pthreadpool_tevent_job *job =
762 talloc_get_type_abort(private_data,
763 struct pthreadpool_tevent_job);
766 job->needs_fence.started = true;
767 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
769 job->fn(job->private_data);
771 job->needs_fence.executed = true;
772 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
776 static int pthreadpool_tevent_job_signal(int jobid,
777 void (*job_fn)(void *private_data),
778 void *job_private_data,
781 struct pthreadpool_tevent_job *job =
782 talloc_get_type_abort(job_private_data,
783 struct pthreadpool_tevent_job);
785 job->needs_fence.finished = true;
786 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
787 if (job->needs_fence.orphaned) {
788 /* Request already gone */
789 job->needs_fence.dropped = true;
790 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
795 * state and state->glue are valid,
796 * see the job->needs_fence.finished
798 * pthreadpool_tevent_job_orphan()
800 if (job->state->glue->tctx != NULL) {
801 /* with HAVE_PTHREAD */
802 tevent_threaded_schedule_immediate(job->state->glue->tctx,
804 pthreadpool_tevent_job_done,
807 /* without HAVE_PTHREAD */
808 tevent_schedule_immediate(job->im,
809 job->state->glue->ev,
810 pthreadpool_tevent_job_done,
814 job->needs_fence.signaled = true;
815 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
819 static void pthreadpool_tevent_job_done(struct tevent_context *ctx,
820 struct tevent_immediate *im,
823 struct pthreadpool_tevent_job *job =
824 talloc_get_type_abort(private_data,
825 struct pthreadpool_tevent_job);
826 struct pthreadpool_tevent_job_state *state = job->state;
828 TALLOC_FREE(job->im);
831 /* Request already gone */
837 * pthreadpool_tevent_job_cleanup()
838 * (called by tevent_req_done() or
839 * tevent_req_error()) will destroy the job.
842 if (job->needs_fence.executed) {
843 tevent_req_done(state->req);
847 tevent_req_error(state->req, ENOEXEC);
851 static bool pthreadpool_tevent_job_cancel(struct tevent_req *req)
853 struct pthreadpool_tevent_job_state *state =
855 struct pthreadpool_tevent_job_state);
856 struct pthreadpool_tevent_job *job = state->job;
863 job->needs_fence.maycancel = true;
864 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
865 if (job->needs_fence.started) {
867 * It was too late to cancel the request.
869 * The job still has the chance to look
870 * at pthreadpool_tevent_current_job_canceled()
871 * or pthreadpool_tevent_current_job_continue()
876 num = pthreadpool_cancel_job(job->pool->pool, 0,
877 pthreadpool_tevent_job_fn,
881 * It was too late to cancel the request.
887 * It was not too late to cancel the request.
889 * We can remove job->im, as it will never be used.
891 TALLOC_FREE(job->im);
894 * pthreadpool_tevent_job_cleanup()
895 * will destroy the job.
897 tevent_req_defer_callback(req, state->ev);
898 tevent_req_error(req, ECANCELED);
902 int pthreadpool_tevent_job_recv(struct tevent_req *req)
904 return tevent_req_simple_recv_unix(req);