2 * Unix SMB/CIFS implementation.
3 * threadpool implementation based on pthreads
4 * Copyright (C) Volker Lendecke 2009,2011
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/threads.h"
23 #include "pthreadpool_tevent.h"
24 #include "pthreadpool.h"
25 #include "lib/util/tevent_unix.h"
26 #include "lib/util/dlinklist.h"
27 #include "lib/util/attr.h"
30 * We try to give some hints to helgrind/drd
32 * Note ANNOTATE_BENIGN_RACE_SIZED(address, size, describtion)
33 * takes an memory address range that ignored by helgrind/drd
34 * 'description' is just ignored...
37 * Note that ANNOTATE_HAPPENS_*(unique_uintptr)
38 * just takes a DWORD/(void *) as unique key
41 #ifdef HAVE_VALGRIND_HELGRIND_H
42 #include <valgrind/helgrind.h>
44 #ifndef ANNOTATE_BENIGN_RACE_SIZED
45 #define ANNOTATE_BENIGN_RACE_SIZED(address, size, describtion)
47 #ifndef ANNOTATE_HAPPENS_BEFORE
48 #define ANNOTATE_HAPPENS_BEFORE(unique_uintptr)
50 #ifndef ANNOTATE_HAPPENS_AFTER
51 #define ANNOTATE_HAPPENS_AFTER(unique_uintptr)
53 #ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL
54 #define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(unique_uintptr)
57 #define PTHREAD_TEVENT_JOB_THREAD_FENCE_INIT(__job) do { \
58 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
59 ANNOTATE_BENIGN_RACE_SIZED(&__j->needs_fence, \
60 sizeof(__j->needs_fence), \
61 "race by design, protected by fence"); \
64 #ifdef WITH_PTHREADPOOL
66 * configure checked we have pthread and atomic_thread_fence() available
68 #define __PTHREAD_TEVENT_JOB_THREAD_FENCE(__order) do { \
69 atomic_thread_fence(__order); \
73 * we're using lib/pthreadpool/pthreadpool_sync.c ...
75 #define __PTHREAD_TEVENT_JOB_THREAD_FENCE(__order) do { } while(0)
81 #define PTHREAD_TEVENT_JOB_THREAD_FENCE(__job) do { \
82 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
83 ANNOTATE_HAPPENS_BEFORE(&__job->needs_fence); \
84 __PTHREAD_TEVENT_JOB_THREAD_FENCE(memory_order_seq_cst); \
85 ANNOTATE_HAPPENS_AFTER(&__job->needs_fence); \
88 #define PTHREAD_TEVENT_JOB_THREAD_FENCE_FINI(__job) do { \
89 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
90 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&__job->needs_fence); \
93 struct pthreadpool_tevent_job_state;
96 * We need one pthreadpool_tevent_glue object per unique combintaion of tevent
97 * contexts and pthreadpool_tevent objects. Maintain a list of used tevent
98 * contexts in a pthreadpool_tevent.
100 struct pthreadpool_tevent_glue {
101 struct pthreadpool_tevent_glue *prev, *next;
102 struct pthreadpool_tevent *pool; /* back-pointer to owning object. */
103 /* Tuple we are keeping track of in this list. */
104 struct tevent_context *ev;
105 struct tevent_threaded_context *tctx;
106 /* Pointer to link object owned by *ev. */
107 struct pthreadpool_tevent_glue_ev_link *ev_link;
109 struct pthreadpool_tevent_job_state *states;
113 * The pthreadpool_tevent_glue_ev_link and its destructor ensure we remove the
114 * tevent context from our list of active event contexts if the event context
116 * This structure is talloc()'ed from the struct tevent_context *, and is a
117 * back-pointer allowing the related struct pthreadpool_tevent_glue object
118 * to be removed from the struct pthreadpool_tevent glue list if the owning
119 * tevent_context is talloc_free()'ed.
121 struct pthreadpool_tevent_glue_ev_link {
122 struct pthreadpool_tevent_glue *glue;
125 struct pthreadpool_tevent {
126 struct pthreadpool *pool;
127 struct pthreadpool_tevent_glue *glue_list;
129 struct pthreadpool_tevent_job *jobs;
132 struct pthreadpool_tevent_job_state {
133 struct pthreadpool_tevent_job_state *prev, *next;
134 struct pthreadpool_tevent_glue *glue;
135 struct tevent_context *ev;
136 struct tevent_req *req;
137 struct pthreadpool_tevent_job *job;
140 struct pthreadpool_tevent_job {
141 struct pthreadpool_tevent_job *prev, *next;
143 struct pthreadpool_tevent *pool;
144 struct pthreadpool_tevent_job_state *state;
145 struct tevent_immediate *im;
147 void (*fn)(void *private_data);
151 * Coordination between threads
153 * There're only one side writing each element
154 * either the main process or the job thread.
156 * The coordination is done by a full memory
157 * barrier using atomic_thread_fence(memory_order_seq_cst)
158 * wrapped in PTHREAD_TEVENT_JOB_THREAD_FENCE()
163 * set when tevent_req_cancel() is called.
164 * (only written by main thread!)
170 * set when talloc_free is called on the job request,
171 * tevent_context or pthreadpool_tevent.
172 * (only written by main thread!)
178 * set when the job is picked up by a worker thread
179 * (only written by job thread!)
185 * set once the job function returned.
186 * (only written by job thread!)
192 * set when pthreadpool_tevent_job_signal() is entered
193 * (only written by job thread!)
199 * set when pthreadpool_tevent_job_signal() leaves with
200 * orphaned already set.
201 * (only written by job thread!)
207 * set when pthreadpool_tevent_job_signal() leaves normal
208 * and the immediate event was scheduled.
209 * (only written by job thread!)
215 static int pthreadpool_tevent_destructor(struct pthreadpool_tevent *pool);
217 static void pthreadpool_tevent_job_orphan(struct pthreadpool_tevent_job *job);
219 static struct pthreadpool_tevent_job *orphaned_jobs;
221 void pthreadpool_tevent_cleanup_orphaned_jobs(void)
223 struct pthreadpool_tevent_job *job = NULL;
224 struct pthreadpool_tevent_job *njob = NULL;
226 for (job = orphaned_jobs; job != NULL; job = njob) {
230 * The job destructor keeps the job alive
231 * (and in the list) or removes it from the list.
237 static int pthreadpool_tevent_job_signal(int jobid,
238 void (*job_fn)(void *private_data),
239 void *job_private_data,
242 int pthreadpool_tevent_init(TALLOC_CTX *mem_ctx, unsigned max_threads,
243 struct pthreadpool_tevent **presult)
245 struct pthreadpool_tevent *pool;
248 pthreadpool_tevent_cleanup_orphaned_jobs();
250 pool = talloc_zero(mem_ctx, struct pthreadpool_tevent);
255 ret = pthreadpool_init(max_threads, &pool->pool,
256 pthreadpool_tevent_job_signal, pool);
262 talloc_set_destructor(pool, pthreadpool_tevent_destructor);
268 size_t pthreadpool_tevent_max_threads(struct pthreadpool_tevent *pool)
270 if (pool->pool == NULL) {
274 return pthreadpool_max_threads(pool->pool);
277 size_t pthreadpool_tevent_queued_jobs(struct pthreadpool_tevent *pool)
279 if (pool->pool == NULL) {
283 return pthreadpool_queued_jobs(pool->pool);
286 static int pthreadpool_tevent_destructor(struct pthreadpool_tevent *pool)
288 struct pthreadpool_tevent_job *job = NULL;
289 struct pthreadpool_tevent_job *njob = NULL;
290 struct pthreadpool_tevent_glue *glue = NULL;
293 ret = pthreadpool_stop(pool->pool);
298 for (job = pool->jobs; job != NULL; job = njob) {
301 /* The job this removes it from the list */
302 pthreadpool_tevent_job_orphan(job);
306 * Delete all the registered
307 * tevent_context/tevent_threaded_context
310 for (glue = pool->glue_list; glue != NULL; glue = pool->glue_list) {
311 /* The glue destructor removes it from the list */
314 pool->glue_list = NULL;
316 ret = pthreadpool_destroy(pool->pool);
322 pthreadpool_tevent_cleanup_orphaned_jobs();
327 static int pthreadpool_tevent_glue_destructor(
328 struct pthreadpool_tevent_glue *glue)
330 struct pthreadpool_tevent_job_state *state = NULL;
331 struct pthreadpool_tevent_job_state *nstate = NULL;
333 for (state = glue->states; state != NULL; state = nstate) {
334 nstate = state->next;
336 /* The job this removes it from the list */
337 pthreadpool_tevent_job_orphan(state->job);
340 if (glue->pool->glue_list != NULL) {
341 DLIST_REMOVE(glue->pool->glue_list, glue);
344 /* Ensure the ev_link destructor knows we're gone */
345 glue->ev_link->glue = NULL;
347 TALLOC_FREE(glue->ev_link);
348 TALLOC_FREE(glue->tctx);
354 * Destructor called either explicitly from
355 * pthreadpool_tevent_glue_destructor(), or indirectly
356 * when owning tevent_context is destroyed.
358 * When called from pthreadpool_tevent_glue_destructor()
359 * ev_link->glue is already NULL, so this does nothing.
361 * When called from talloc_free() of the owning
362 * tevent_context we must ensure we also remove the
363 * linked glue object from the list inside
364 * struct pthreadpool_tevent.
366 static int pthreadpool_tevent_glue_link_destructor(
367 struct pthreadpool_tevent_glue_ev_link *ev_link)
369 TALLOC_FREE(ev_link->glue);
373 static int pthreadpool_tevent_register_ev(
374 struct pthreadpool_tevent *pool,
375 struct pthreadpool_tevent_job_state *state)
377 struct tevent_context *ev = state->ev;
378 struct pthreadpool_tevent_glue *glue = NULL;
379 struct pthreadpool_tevent_glue_ev_link *ev_link = NULL;
382 * See if this tevent_context was already registered by
383 * searching the glue object list. If so we have nothing
384 * to do here - we already have a tevent_context/tevent_threaded_context
387 for (glue = pool->glue_list; glue != NULL; glue = glue->next) {
388 if (glue->ev == state->ev) {
390 DLIST_ADD_END(glue->states, state);
396 * Event context not yet registered - create a new glue
397 * object containing a tevent_context/tevent_threaded_context
398 * pair and put it on the list to remember this registration.
399 * We also need a link object to ensure the event context
400 * can't go away without us knowing about it.
402 glue = talloc_zero(pool, struct pthreadpool_tevent_glue);
406 *glue = (struct pthreadpool_tevent_glue) {
410 talloc_set_destructor(glue, pthreadpool_tevent_glue_destructor);
413 * Now allocate the link object to the event context. Note this
414 * is allocated OFF THE EVENT CONTEXT ITSELF, so if the event
415 * context is freed we are able to cleanup the glue object
416 * in the link object destructor.
419 ev_link = talloc_zero(ev, struct pthreadpool_tevent_glue_ev_link);
420 if (ev_link == NULL) {
424 ev_link->glue = glue;
425 talloc_set_destructor(ev_link, pthreadpool_tevent_glue_link_destructor);
427 glue->ev_link = ev_link;
430 glue->tctx = tevent_threaded_context_create(glue, ev);
431 if (glue->tctx == NULL) {
432 TALLOC_FREE(ev_link);
439 DLIST_ADD_END(glue->states, state);
441 DLIST_ADD(pool->glue_list, glue);
445 static void pthreadpool_tevent_job_fn(void *private_data);
446 static void pthreadpool_tevent_job_done(struct tevent_context *ctx,
447 struct tevent_immediate *im,
449 static bool pthreadpool_tevent_job_cancel(struct tevent_req *req);
451 static int pthreadpool_tevent_job_destructor(struct pthreadpool_tevent_job *job)
454 * We should never be called with needs_fence.orphaned == false.
455 * Only pthreadpool_tevent_job_orphan() will call TALLOC_FREE(job)
456 * after detaching from the request state, glue and pool list.
458 if (!job->needs_fence.orphaned) {
463 * If the job is not finished (job->im still there)
464 * and it's still attached to the pool,
465 * we try to cancel it (before it was starts)
467 if (job->im != NULL && job->pool != NULL) {
470 num = pthreadpool_cancel_job(job->pool->pool, 0,
471 pthreadpool_tevent_job_fn,
475 * It was not too late to cancel the request.
477 * We can remove job->im, as it will never be used.
479 TALLOC_FREE(job->im);
483 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
484 if (job->needs_fence.dropped) {
486 * The signal function saw job->needs_fence.orphaned
487 * before it started the signaling via the immediate
488 * event. So we'll never geht triggered and can
489 * remove job->im and let the whole job go...
491 TALLOC_FREE(job->im);
495 * pthreadpool_tevent_job_orphan() already removed
496 * it from pool->jobs. And we don't need try
497 * pthreadpool_cancel_job() again.
501 if (job->im != NULL) {
503 * state->im still there means, we need to wait for the
504 * immediate event to be triggered or just leak the memory.
506 * Move it to the orphaned list, if it's not already there.
512 * Finally remove from the orphaned_jobs list
513 * and let talloc destroy us.
515 DLIST_REMOVE(orphaned_jobs, job);
517 PTHREAD_TEVENT_JOB_THREAD_FENCE_FINI(job);
521 static void pthreadpool_tevent_job_orphan(struct pthreadpool_tevent_job *job)
523 job->needs_fence.orphaned = true;
524 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
527 * We're the only function that sets
530 if (job->state == NULL) {
535 * Once we marked the request as 'orphaned'
536 * we spin/loop if it's already marked
537 * as 'finished' (which means that
538 * pthreadpool_tevent_job_signal() was entered.
539 * If it saw 'orphaned' it will exit after setting
540 * 'dropped', otherwise it dereferences
541 * job->state->glue->{tctx,ev} until it exited
542 * after setting 'signaled'.
544 * We need to close this potential gab before
545 * we can set job->state = NULL.
547 * This is some kind of spinlock, but with
548 * 1 millisecond sleeps in between, in order
549 * to give the thread more cpu time to finish.
551 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
552 while (job->needs_fence.finished) {
553 if (job->needs_fence.dropped) {
556 if (job->needs_fence.signaled) {
560 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
564 * Once the gab is closed, we can remove
567 DLIST_REMOVE(job->state->glue->states, job->state);
568 job->state->glue = NULL;
571 * We need to reparent to a long term context.
572 * And detach from the request state.
573 * Maybe the destructor will keep the memory
574 * and leak it for now.
576 (void)talloc_reparent(job->state, NULL, job);
577 job->state->job = NULL;
581 * job->pool will only be set to NULL
582 * in the first destructur run.
584 if (job->pool == NULL) {
589 * Dettach it from the pool.
591 * The job might still be running,
592 * so we keep job->pool.
593 * The destructor will set it to NULL
594 * after trying pthreadpool_cancel_job()
596 DLIST_REMOVE(job->pool->jobs, job);
599 * Add it to the list of orphaned jobs,
600 * which may be cleaned up later.
602 * The destructor removes it from the list
603 * when possible or it denies the free
604 * and keep it in the list.
606 DLIST_ADD_END(orphaned_jobs, job);
610 static void pthreadpool_tevent_job_cleanup(struct tevent_req *req,
611 enum tevent_req_state req_state)
613 struct pthreadpool_tevent_job_state *state =
615 struct pthreadpool_tevent_job_state);
617 if (state->job == NULL) {
619 * The job request is not scheduled in the pool
622 if (state->glue != NULL) {
623 DLIST_REMOVE(state->glue->states, state);
630 * We need to reparent to a long term context.
631 * Maybe the destructor will keep the memory
632 * and leak it for now.
634 pthreadpool_tevent_job_orphan(state->job);
635 state->job = NULL; /* not needed but looks better */
639 struct tevent_req *pthreadpool_tevent_job_send(
640 TALLOC_CTX *mem_ctx, struct tevent_context *ev,
641 struct pthreadpool_tevent *pool,
642 void (*fn)(void *private_data), void *private_data)
644 struct tevent_req *req = NULL;
645 struct pthreadpool_tevent_job_state *state = NULL;
646 struct pthreadpool_tevent_job *job = NULL;
649 pthreadpool_tevent_cleanup_orphaned_jobs();
651 req = tevent_req_create(mem_ctx, &state,
652 struct pthreadpool_tevent_job_state);
659 tevent_req_set_cleanup_fn(req, pthreadpool_tevent_job_cleanup);
662 tevent_req_error(req, EINVAL);
663 return tevent_req_post(req, ev);
665 if (pool->pool == NULL) {
666 tevent_req_error(req, EINVAL);
667 return tevent_req_post(req, ev);
670 ret = pthreadpool_tevent_register_ev(pool, state);
671 if (tevent_req_error(req, ret)) {
672 return tevent_req_post(req, ev);
675 job = talloc_zero(state, struct pthreadpool_tevent_job);
676 if (tevent_req_nomem(job, req)) {
677 return tevent_req_post(req, ev);
681 job->private_data = private_data;
682 job->im = tevent_create_immediate(state->job);
683 if (tevent_req_nomem(job->im, req)) {
684 return tevent_req_post(req, ev);
686 PTHREAD_TEVENT_JOB_THREAD_FENCE_INIT(job);
687 talloc_set_destructor(job, pthreadpool_tevent_job_destructor);
688 DLIST_ADD_END(job->pool->jobs, job);
692 ret = pthreadpool_add_job(job->pool->pool, 0,
693 pthreadpool_tevent_job_fn,
695 if (tevent_req_error(req, ret)) {
696 return tevent_req_post(req, ev);
699 tevent_req_set_cancel_fn(req, pthreadpool_tevent_job_cancel);
703 static __thread struct pthreadpool_tevent_job *current_job;
705 bool pthreadpool_tevent_current_job_canceled(void)
707 if (current_job == NULL) {
709 * Should only be called from within
716 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
717 return current_job->needs_fence.maycancel;
720 bool pthreadpool_tevent_current_job_orphaned(void)
722 if (current_job == NULL) {
724 * Should only be called from within
731 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
732 return current_job->needs_fence.orphaned;
735 bool pthreadpool_tevent_current_job_continue(void)
737 if (current_job == NULL) {
739 * Should only be called from within
746 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
747 if (current_job->needs_fence.maycancel) {
750 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
751 if (current_job->needs_fence.orphaned) {
758 static void pthreadpool_tevent_job_fn(void *private_data)
760 struct pthreadpool_tevent_job *job =
761 talloc_get_type_abort(private_data,
762 struct pthreadpool_tevent_job);
765 job->needs_fence.started = true;
766 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
768 job->fn(job->private_data);
770 job->needs_fence.executed = true;
771 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
775 static int pthreadpool_tevent_job_signal(int jobid,
776 void (*job_fn)(void *private_data),
777 void *job_private_data,
780 struct pthreadpool_tevent_job *job =
781 talloc_get_type_abort(job_private_data,
782 struct pthreadpool_tevent_job);
784 job->needs_fence.finished = true;
785 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
786 if (job->needs_fence.orphaned) {
787 /* Request already gone */
788 job->needs_fence.dropped = true;
789 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
794 * state and state->glue are valid,
795 * see the job->needs_fence.finished
797 * pthreadpool_tevent_job_orphan()
799 if (job->state->glue->tctx != NULL) {
800 /* with HAVE_PTHREAD */
801 tevent_threaded_schedule_immediate(job->state->glue->tctx,
803 pthreadpool_tevent_job_done,
806 /* without HAVE_PTHREAD */
807 tevent_schedule_immediate(job->im,
808 job->state->glue->ev,
809 pthreadpool_tevent_job_done,
813 job->needs_fence.signaled = true;
814 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
818 static void pthreadpool_tevent_job_done(struct tevent_context *ctx,
819 struct tevent_immediate *im,
822 struct pthreadpool_tevent_job *job =
823 talloc_get_type_abort(private_data,
824 struct pthreadpool_tevent_job);
825 struct pthreadpool_tevent_job_state *state = job->state;
827 TALLOC_FREE(job->im);
830 /* Request already gone */
836 * pthreadpool_tevent_job_cleanup()
837 * (called by tevent_req_done() or
838 * tevent_req_error()) will destroy the job.
841 if (job->needs_fence.executed) {
842 tevent_req_done(state->req);
846 tevent_req_error(state->req, ENOEXEC);
850 static bool pthreadpool_tevent_job_cancel(struct tevent_req *req)
852 struct pthreadpool_tevent_job_state *state =
854 struct pthreadpool_tevent_job_state);
855 struct pthreadpool_tevent_job *job = state->job;
862 job->needs_fence.maycancel = true;
863 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
864 if (job->needs_fence.started) {
866 * It was too late to cancel the request.
868 * The job still has the chance to look
869 * at pthreadpool_tevent_current_job_canceled()
870 * or pthreadpool_tevent_current_job_continue()
875 num = pthreadpool_cancel_job(job->pool->pool, 0,
876 pthreadpool_tevent_job_fn,
880 * It was too late to cancel the request.
886 * It was not too late to cancel the request.
888 * We can remove job->im, as it will never be used.
890 TALLOC_FREE(job->im);
893 * pthreadpool_tevent_job_cleanup()
894 * will destroy the job.
896 tevent_req_defer_callback(req, state->ev);
897 tevent_req_error(req, ECANCELED);
901 int pthreadpool_tevent_job_recv(struct tevent_req *req)
903 return tevent_req_simple_recv_unix(req);