2 * Unix SMB/CIFS implementation.
3 * threadpool implementation based on pthreads
4 * Copyright (C) Volker Lendecke 2009,2011
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 3 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
21 #include "system/select.h"
22 #include "system/threads.h"
23 #include "system/filesys.h"
24 #include "pthreadpool_tevent.h"
25 #include "pthreadpool.h"
26 #include "lib/util/tevent_unix.h"
27 #include "lib/util/dlinklist.h"
28 #include "lib/util/attr.h"
31 * We try to give some hints to helgrind/drd
33 * Note ANNOTATE_BENIGN_RACE_SIZED(address, size, describtion)
34 * takes an memory address range that ignored by helgrind/drd
35 * 'description' is just ignored...
38 * Note that ANNOTATE_HAPPENS_*(unique_uintptr)
39 * just takes a DWORD/(void *) as unique key
42 #ifdef HAVE_VALGRIND_HELGRIND_H
43 #include <valgrind/helgrind.h>
45 #ifndef ANNOTATE_BENIGN_RACE_SIZED
46 #define ANNOTATE_BENIGN_RACE_SIZED(address, size, describtion)
48 #ifndef ANNOTATE_HAPPENS_BEFORE
49 #define ANNOTATE_HAPPENS_BEFORE(unique_uintptr)
51 #ifndef ANNOTATE_HAPPENS_AFTER
52 #define ANNOTATE_HAPPENS_AFTER(unique_uintptr)
54 #ifndef ANNOTATE_HAPPENS_BEFORE_FORGET_ALL
55 #define ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(unique_uintptr)
58 #define PTHREAD_TEVENT_JOB_THREAD_FENCE_INIT(__job) do { \
59 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
60 ANNOTATE_BENIGN_RACE_SIZED(&__j->needs_fence, \
61 sizeof(__j->needs_fence), \
62 "race by design, protected by fence"); \
65 #ifdef WITH_PTHREADPOOL
67 * configure checked we have pthread and atomic_thread_fence() available
69 #define __PTHREAD_TEVENT_JOB_THREAD_FENCE(__order) do { \
70 atomic_thread_fence(__order); \
74 * we're using lib/pthreadpool/pthreadpool_sync.c ...
76 #define __PTHREAD_TEVENT_JOB_THREAD_FENCE(__order) do { } while(0)
82 #define PTHREAD_TEVENT_JOB_THREAD_FENCE(__job) do { \
83 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
84 ANNOTATE_HAPPENS_BEFORE(&__job->needs_fence); \
85 __PTHREAD_TEVENT_JOB_THREAD_FENCE(memory_order_seq_cst); \
86 ANNOTATE_HAPPENS_AFTER(&__job->needs_fence); \
89 #define PTHREAD_TEVENT_JOB_THREAD_FENCE_FINI(__job) do { \
90 _UNUSED_ const struct pthreadpool_tevent_job *__j = __job; \
91 ANNOTATE_HAPPENS_BEFORE_FORGET_ALL(&__job->needs_fence); \
94 struct pthreadpool_tevent_job_state;
97 * We need one pthreadpool_tevent_glue object per unique combintaion of tevent
98 * contexts and pthreadpool_tevent objects. Maintain a list of used tevent
99 * contexts in a pthreadpool_tevent.
101 struct pthreadpool_tevent_glue {
102 struct pthreadpool_tevent_glue *prev, *next;
103 struct pthreadpool_tevent *pool; /* back-pointer to owning object. */
104 /* Tuple we are keeping track of in this list. */
105 struct tevent_context *ev;
106 struct tevent_threaded_context *tctx;
107 /* Pointer to link object owned by *ev. */
108 struct pthreadpool_tevent_glue_ev_link *ev_link;
110 struct pthreadpool_tevent_job_state *states;
114 * The pthreadpool_tevent_glue_ev_link and its destructor ensure we remove the
115 * tevent context from our list of active event contexts if the event context
117 * This structure is talloc()'ed from the struct tevent_context *, and is a
118 * back-pointer allowing the related struct pthreadpool_tevent_glue object
119 * to be removed from the struct pthreadpool_tevent glue list if the owning
120 * tevent_context is talloc_free()'ed.
122 struct pthreadpool_tevent_glue_ev_link {
123 struct pthreadpool_tevent_glue *glue;
126 struct pthreadpool_tevent {
127 struct pthreadpool *pool;
128 struct pthreadpool_tevent_glue *glue_list;
130 struct pthreadpool_tevent_job *jobs;
133 struct pthreadpool_tevent_job_state {
134 struct pthreadpool_tevent_job_state *prev, *next;
135 struct pthreadpool_tevent_glue *glue;
136 struct tevent_context *ev;
137 struct tevent_req *req;
138 struct pthreadpool_tevent_job *job;
141 struct pthreadpool_tevent_job {
142 struct pthreadpool_tevent_job *prev, *next;
144 struct pthreadpool_tevent *pool;
145 struct pthreadpool_tevent_job_state *state;
146 struct tevent_immediate *im;
148 void (*fn)(void *private_data);
152 * Coordination between threads
154 * There're only one side writing each element
155 * either the main process or the job thread.
157 * The coordination is done by a full memory
158 * barrier using atomic_thread_fence(memory_order_seq_cst)
159 * wrapped in PTHREAD_TEVENT_JOB_THREAD_FENCE()
164 * set when tevent_req_cancel() is called.
165 * (only written by main thread!)
171 * set when talloc_free is called on the job request,
172 * tevent_context or pthreadpool_tevent.
173 * (only written by main thread!)
179 * set when the job is picked up by a worker thread
180 * (only written by job thread!)
186 * set once the job function returned.
187 * (only written by job thread!)
193 * set when pthreadpool_tevent_job_signal() is entered
194 * (only written by job thread!)
200 * set when pthreadpool_tevent_job_signal() leaves with
201 * orphaned already set.
202 * (only written by job thread!)
208 * set when pthreadpool_tevent_job_signal() leaves normal
209 * and the immediate event was scheduled.
210 * (only written by job thread!)
218 static int pthreadpool_tevent_destructor(struct pthreadpool_tevent *pool);
220 static void pthreadpool_tevent_job_orphan(struct pthreadpool_tevent_job *job);
222 static struct pthreadpool_tevent_job *orphaned_jobs;
224 void pthreadpool_tevent_cleanup_orphaned_jobs(void)
226 struct pthreadpool_tevent_job *job = NULL;
227 struct pthreadpool_tevent_job *njob = NULL;
229 for (job = orphaned_jobs; job != NULL; job = njob) {
233 * The job destructor keeps the job alive
234 * (and in the list) or removes it from the list.
240 static int pthreadpool_tevent_job_signal(int jobid,
241 void (*job_fn)(void *private_data),
242 void *job_private_data,
245 int pthreadpool_tevent_init(TALLOC_CTX *mem_ctx, unsigned max_threads,
246 struct pthreadpool_tevent **presult)
248 struct pthreadpool_tevent *pool;
251 pthreadpool_tevent_cleanup_orphaned_jobs();
253 pool = talloc_zero(mem_ctx, struct pthreadpool_tevent);
258 ret = pthreadpool_init(max_threads, &pool->pool,
259 pthreadpool_tevent_job_signal, pool);
265 talloc_set_destructor(pool, pthreadpool_tevent_destructor);
271 size_t pthreadpool_tevent_max_threads(struct pthreadpool_tevent *pool)
273 if (pool->pool == NULL) {
277 return pthreadpool_max_threads(pool->pool);
280 size_t pthreadpool_tevent_queued_jobs(struct pthreadpool_tevent *pool)
282 if (pool->pool == NULL) {
286 return pthreadpool_queued_jobs(pool->pool);
289 bool pthreadpool_tevent_per_thread_cwd(struct pthreadpool_tevent *pool)
291 if (pool->pool == NULL) {
295 return pthreadpool_per_thread_cwd(pool->pool);
298 static int pthreadpool_tevent_destructor(struct pthreadpool_tevent *pool)
300 struct pthreadpool_tevent_job *job = NULL;
301 struct pthreadpool_tevent_job *njob = NULL;
302 struct pthreadpool_tevent_glue *glue = NULL;
305 ret = pthreadpool_stop(pool->pool);
310 for (job = pool->jobs; job != NULL; job = njob) {
313 /* The job this removes it from the list */
314 pthreadpool_tevent_job_orphan(job);
318 * Delete all the registered
319 * tevent_context/tevent_threaded_context
322 for (glue = pool->glue_list; glue != NULL; glue = pool->glue_list) {
323 /* The glue destructor removes it from the list */
326 pool->glue_list = NULL;
328 ret = pthreadpool_destroy(pool->pool);
334 pthreadpool_tevent_cleanup_orphaned_jobs();
339 static int pthreadpool_tevent_glue_destructor(
340 struct pthreadpool_tevent_glue *glue)
342 struct pthreadpool_tevent_job_state *state = NULL;
343 struct pthreadpool_tevent_job_state *nstate = NULL;
345 for (state = glue->states; state != NULL; state = nstate) {
346 nstate = state->next;
348 /* The job this removes it from the list */
349 pthreadpool_tevent_job_orphan(state->job);
352 if (glue->pool->glue_list != NULL) {
353 DLIST_REMOVE(glue->pool->glue_list, glue);
356 /* Ensure the ev_link destructor knows we're gone */
357 glue->ev_link->glue = NULL;
359 TALLOC_FREE(glue->ev_link);
360 TALLOC_FREE(glue->tctx);
366 * Destructor called either explicitly from
367 * pthreadpool_tevent_glue_destructor(), or indirectly
368 * when owning tevent_context is destroyed.
370 * When called from pthreadpool_tevent_glue_destructor()
371 * ev_link->glue is already NULL, so this does nothing.
373 * When called from talloc_free() of the owning
374 * tevent_context we must ensure we also remove the
375 * linked glue object from the list inside
376 * struct pthreadpool_tevent.
378 static int pthreadpool_tevent_glue_link_destructor(
379 struct pthreadpool_tevent_glue_ev_link *ev_link)
381 TALLOC_FREE(ev_link->glue);
385 static int pthreadpool_tevent_register_ev(
386 struct pthreadpool_tevent *pool,
387 struct pthreadpool_tevent_job_state *state)
389 struct tevent_context *ev = state->ev;
390 struct pthreadpool_tevent_glue *glue = NULL;
391 struct pthreadpool_tevent_glue_ev_link *ev_link = NULL;
394 * See if this tevent_context was already registered by
395 * searching the glue object list. If so we have nothing
396 * to do here - we already have a tevent_context/tevent_threaded_context
399 for (glue = pool->glue_list; glue != NULL; glue = glue->next) {
400 if (glue->ev == state->ev) {
402 DLIST_ADD_END(glue->states, state);
408 * Event context not yet registered - create a new glue
409 * object containing a tevent_context/tevent_threaded_context
410 * pair and put it on the list to remember this registration.
411 * We also need a link object to ensure the event context
412 * can't go away without us knowing about it.
414 glue = talloc_zero(pool, struct pthreadpool_tevent_glue);
418 *glue = (struct pthreadpool_tevent_glue) {
422 talloc_set_destructor(glue, pthreadpool_tevent_glue_destructor);
425 * Now allocate the link object to the event context. Note this
426 * is allocated OFF THE EVENT CONTEXT ITSELF, so if the event
427 * context is freed we are able to cleanup the glue object
428 * in the link object destructor.
431 ev_link = talloc_zero(ev, struct pthreadpool_tevent_glue_ev_link);
432 if (ev_link == NULL) {
436 ev_link->glue = glue;
437 talloc_set_destructor(ev_link, pthreadpool_tevent_glue_link_destructor);
439 glue->ev_link = ev_link;
442 glue->tctx = tevent_threaded_context_create(glue, ev);
443 if (glue->tctx == NULL) {
444 TALLOC_FREE(ev_link);
451 DLIST_ADD_END(glue->states, state);
453 DLIST_ADD(pool->glue_list, glue);
457 static void pthreadpool_tevent_job_fn(void *private_data);
458 static void pthreadpool_tevent_job_done(struct tevent_context *ctx,
459 struct tevent_immediate *im,
461 static bool pthreadpool_tevent_job_cancel(struct tevent_req *req);
463 static int pthreadpool_tevent_job_destructor(struct pthreadpool_tevent_job *job)
466 * We should never be called with needs_fence.orphaned == false.
467 * Only pthreadpool_tevent_job_orphan() will call TALLOC_FREE(job)
468 * after detaching from the request state, glue and pool list.
470 if (!job->needs_fence.orphaned) {
475 * If the job is not finished (job->im still there)
476 * and it's still attached to the pool,
477 * we try to cancel it (before it was starts)
479 if (job->im != NULL && job->pool != NULL) {
482 num = pthreadpool_cancel_job(job->pool->pool, 0,
483 pthreadpool_tevent_job_fn,
487 * It was not too late to cancel the request.
489 * We can remove job->im, as it will never be used.
491 TALLOC_FREE(job->im);
495 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
496 if (job->needs_fence.dropped) {
498 * The signal function saw job->needs_fence.orphaned
499 * before it started the signaling via the immediate
500 * event. So we'll never geht triggered and can
501 * remove job->im and let the whole job go...
503 TALLOC_FREE(job->im);
507 * TODO?: We could further improve this by adjusting
508 * tevent_threaded_schedule_immediate_destructor()
509 * and allow TALLOC_FREE() during its time
510 * in the main_ev->scheduled_immediates list.
512 * PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
513 * if (state->needs_fence.signaled) {
515 * * The signal function is completed
516 * * in future we may be allowed
517 * * to call TALLOC_FREE(job->im).
519 * TALLOC_FREE(job->im);
524 * pthreadpool_tevent_job_orphan() already removed
525 * it from pool->jobs. And we don't need try
526 * pthreadpool_cancel_job() again.
530 if (job->im != NULL) {
532 * state->im still there means, we need to wait for the
533 * immediate event to be triggered or just leak the memory.
535 * Move it to the orphaned list, if it's not already there.
541 * Finally remove from the orphaned_jobs list
542 * and let talloc destroy us.
544 DLIST_REMOVE(orphaned_jobs, job);
546 PTHREAD_TEVENT_JOB_THREAD_FENCE_FINI(job);
550 static void pthreadpool_tevent_job_orphan(struct pthreadpool_tevent_job *job)
552 job->needs_fence.orphaned = true;
553 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
556 * We're the only function that sets
559 if (job->state == NULL) {
564 * Once we marked the request as 'orphaned'
565 * we spin/loop if it's already marked
566 * as 'finished' (which means that
567 * pthreadpool_tevent_job_signal() was entered.
568 * If it saw 'orphaned' it will exit after setting
569 * 'dropped', otherwise it dereferences
570 * job->state->glue->{tctx,ev} until it exited
571 * after setting 'signaled'.
573 * We need to close this potential gab before
574 * we can set job->state = NULL.
576 * This is some kind of spinlock, but with
577 * 1 millisecond sleeps in between, in order
578 * to give the thread more cpu time to finish.
580 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
581 while (job->needs_fence.finished) {
582 if (job->needs_fence.dropped) {
585 if (job->needs_fence.signaled) {
589 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
593 * Once the gab is closed, we can remove
596 DLIST_REMOVE(job->state->glue->states, job->state);
597 job->state->glue = NULL;
600 * We need to reparent to a long term context.
601 * And detach from the request state.
602 * Maybe the destructor will keep the memory
603 * and leak it for now.
605 (void)talloc_reparent(job->state, NULL, job);
606 job->state->job = NULL;
610 * job->pool will only be set to NULL
611 * in the first destructur run.
613 if (job->pool == NULL) {
618 * Dettach it from the pool.
620 * The job might still be running,
621 * so we keep job->pool.
622 * The destructor will set it to NULL
623 * after trying pthreadpool_cancel_job()
625 DLIST_REMOVE(job->pool->jobs, job);
628 * Add it to the list of orphaned jobs,
629 * which may be cleaned up later.
631 * The destructor removes it from the list
632 * when possible or it denies the free
633 * and keep it in the list.
635 DLIST_ADD_END(orphaned_jobs, job);
639 static void pthreadpool_tevent_job_cleanup(struct tevent_req *req,
640 enum tevent_req_state req_state)
642 struct pthreadpool_tevent_job_state *state =
644 struct pthreadpool_tevent_job_state);
646 if (state->job == NULL) {
648 * The job request is not scheduled in the pool
651 if (state->glue != NULL) {
652 DLIST_REMOVE(state->glue->states, state);
659 * We need to reparent to a long term context.
660 * Maybe the destructor will keep the memory
661 * and leak it for now.
663 pthreadpool_tevent_job_orphan(state->job);
664 state->job = NULL; /* not needed but looks better */
668 struct tevent_req *pthreadpool_tevent_job_send(
669 TALLOC_CTX *mem_ctx, struct tevent_context *ev,
670 struct pthreadpool_tevent *pool,
671 void (*fn)(void *private_data), void *private_data)
673 struct tevent_req *req = NULL;
674 struct pthreadpool_tevent_job_state *state = NULL;
675 struct pthreadpool_tevent_job *job = NULL;
678 pthreadpool_tevent_cleanup_orphaned_jobs();
680 req = tevent_req_create(mem_ctx, &state,
681 struct pthreadpool_tevent_job_state);
688 tevent_req_set_cleanup_fn(req, pthreadpool_tevent_job_cleanup);
691 tevent_req_error(req, EINVAL);
692 return tevent_req_post(req, ev);
694 if (pool->pool == NULL) {
695 tevent_req_error(req, EINVAL);
696 return tevent_req_post(req, ev);
699 ret = pthreadpool_tevent_register_ev(pool, state);
700 if (tevent_req_error(req, ret)) {
701 return tevent_req_post(req, ev);
704 job = talloc_zero(state, struct pthreadpool_tevent_job);
705 if (tevent_req_nomem(job, req)) {
706 return tevent_req_post(req, ev);
710 job->private_data = private_data;
711 job->im = tevent_create_immediate(state->job);
712 if (tevent_req_nomem(job->im, req)) {
713 return tevent_req_post(req, ev);
715 PTHREAD_TEVENT_JOB_THREAD_FENCE_INIT(job);
716 job->per_thread_cwd = pthreadpool_tevent_per_thread_cwd(pool);
717 talloc_set_destructor(job, pthreadpool_tevent_job_destructor);
718 DLIST_ADD_END(job->pool->jobs, job);
722 ret = pthreadpool_add_job(job->pool->pool, 0,
723 pthreadpool_tevent_job_fn,
725 if (tevent_req_error(req, ret)) {
726 return tevent_req_post(req, ev);
729 tevent_req_set_cancel_fn(req, pthreadpool_tevent_job_cancel);
733 static __thread struct pthreadpool_tevent_job *current_job;
735 bool pthreadpool_tevent_current_job_canceled(void)
737 if (current_job == NULL) {
739 * Should only be called from within
746 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
747 return current_job->needs_fence.maycancel;
750 bool pthreadpool_tevent_current_job_orphaned(void)
752 if (current_job == NULL) {
754 * Should only be called from within
761 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
762 return current_job->needs_fence.orphaned;
765 bool pthreadpool_tevent_current_job_continue(void)
767 if (current_job == NULL) {
769 * Should only be called from within
776 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
777 if (current_job->needs_fence.maycancel) {
780 PTHREAD_TEVENT_JOB_THREAD_FENCE(current_job);
781 if (current_job->needs_fence.orphaned) {
788 bool pthreadpool_tevent_current_job_per_thread_cwd(void)
790 if (current_job == NULL) {
792 * Should only be called from within
799 return current_job->per_thread_cwd;
802 static void pthreadpool_tevent_job_fn(void *private_data)
804 struct pthreadpool_tevent_job *job =
805 talloc_get_type_abort(private_data,
806 struct pthreadpool_tevent_job);
809 job->needs_fence.started = true;
810 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
812 job->fn(job->private_data);
814 job->needs_fence.executed = true;
815 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
819 static int pthreadpool_tevent_job_signal(int jobid,
820 void (*job_fn)(void *private_data),
821 void *job_private_data,
824 struct pthreadpool_tevent_job *job =
825 talloc_get_type_abort(job_private_data,
826 struct pthreadpool_tevent_job);
828 job->needs_fence.finished = true;
829 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
830 if (job->needs_fence.orphaned) {
831 /* Request already gone */
832 job->needs_fence.dropped = true;
833 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
838 * state and state->glue are valid,
839 * see the job->needs_fence.finished
841 * pthreadpool_tevent_job_orphan()
843 if (job->state->glue->tctx != NULL) {
844 /* with HAVE_PTHREAD */
845 tevent_threaded_schedule_immediate(job->state->glue->tctx,
847 pthreadpool_tevent_job_done,
850 /* without HAVE_PTHREAD */
851 tevent_schedule_immediate(job->im,
852 job->state->glue->ev,
853 pthreadpool_tevent_job_done,
857 job->needs_fence.signaled = true;
858 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
862 static void pthreadpool_tevent_job_done(struct tevent_context *ctx,
863 struct tevent_immediate *im,
866 struct pthreadpool_tevent_job *job =
867 talloc_get_type_abort(private_data,
868 struct pthreadpool_tevent_job);
869 struct pthreadpool_tevent_job_state *state = job->state;
871 TALLOC_FREE(job->im);
874 /* Request already gone */
880 * pthreadpool_tevent_job_cleanup()
881 * (called by tevent_req_done() or
882 * tevent_req_error()) will destroy the job.
885 if (job->needs_fence.executed) {
886 tevent_req_done(state->req);
890 tevent_req_error(state->req, ENOEXEC);
894 static bool pthreadpool_tevent_job_cancel(struct tevent_req *req)
896 struct pthreadpool_tevent_job_state *state =
898 struct pthreadpool_tevent_job_state);
899 struct pthreadpool_tevent_job *job = state->job;
906 job->needs_fence.maycancel = true;
907 PTHREAD_TEVENT_JOB_THREAD_FENCE(job);
908 if (job->needs_fence.started) {
910 * It was too late to cancel the request.
912 * The job still has the chance to look
913 * at pthreadpool_tevent_current_job_canceled()
914 * or pthreadpool_tevent_current_job_continue()
919 num = pthreadpool_cancel_job(job->pool->pool, 0,
920 pthreadpool_tevent_job_fn,
924 * It was too late to cancel the request.
930 * It was not too late to cancel the request.
932 * We can remove job->im, as it will never be used.
934 TALLOC_FREE(job->im);
937 * pthreadpool_tevent_job_cleanup()
938 * will destroy the job.
940 tevent_req_defer_callback(req, state->ev);
941 tevent_req_error(req, ECANCELED);
945 int pthreadpool_tevent_job_recv(struct tevent_req *req)
947 return tevent_req_simple_recv_unix(req);