s3: Avoid a potential 100% CPU loop in winbindd
authorVolker Lendecke <vl@samba.org>
Fri, 29 Apr 2011 11:00:14 +0000 (13:00 +0200)
committerVolker Lendecke <vlendec@samba.org>
Fri, 29 Apr 2011 15:58:19 +0000 (17:58 +0200)
In the clustering case if ctdb is unhappy, winbindd_reinit_after_fork fails.
This can lead to an endless loop depending on the scheduling of the parent vs
child. Parent forks, child is immediately scheduled and exits. Parent gets
SIGCHLD, parent is then scheduled before it sends the request out to the child.
Parent tries to fork again immediately.

The code before this patch did not really take into account that
reinit_after_fork can fail. The code now sends the result of
winbindd_reinit_after_fork to the parent and the parent only considers the
child alive when it got NT_STATUS_OK.

This was seen in 3.4 winbind. winbind has changed significantly since then, so
it might be possible that this does not happen anymore in exactly this way. But
passing up the status of reinit_after_fork and only consider the child alive
when that's ok is the correct thing to do anyway.

Autobuild-User: Volker Lendecke <vlendec@samba.org>
Autobuild-Date: Fri Apr 29 17:58:19 CEST 2011 on sn-devel-104

source3/winbindd/winbindd_dual.c

index 8df6708778bf62346e7710d425c9824685fb628a..ebafe8f3f02051f20c5ceeb1099841e843a08770 100644 (file)
@@ -1292,6 +1292,7 @@ static bool fork_domain_child(struct winbindd_child *child)
        struct winbindd_response response;
        struct winbindd_domain *primary_domain = NULL;
        NTSTATUS status;
+       ssize_t nwritten;
 
        if (child->domain) {
                DEBUG(10, ("fork_domain_child called for domain '%s'\n",
@@ -1320,7 +1321,25 @@ static bool fork_domain_child(struct winbindd_child *child)
 
        if (child->pid != 0) {
                /* Parent */
+               ssize_t nread;
+
                close(fdpair[0]);
+
+               nread = read(fdpair[1], &status, sizeof(status));
+               if (nread != sizeof(status)) {
+                       DEBUG(1, ("fork_domain_child: Could not read child status: "
+                                 "nread=%d, error=%s\n", (int)nread,
+                                 strerror(errno)));
+                       close(fdpair[1]);
+                       return false;
+               }
+               if (!NT_STATUS_IS_OK(status)) {
+                       DEBUG(1, ("fork_domain_child: Child status is %s\n",
+                                 nt_errstr(status)));
+                       close(fdpair[1]);
+                       return false;
+               }
+
                child->next = child->prev = NULL;
                DLIST_ADD(winbindd_children, child);
                child->sock = fdpair[1];
@@ -1336,6 +1355,14 @@ static bool fork_domain_child(struct winbindd_child *child)
        close(fdpair[1]);
 
        status = winbindd_reinit_after_fork(child, child->logfilename);
+
+       nwritten = write(state.sock, &status, sizeof(status));
+       if (nwritten != sizeof(status)) {
+               DEBUG(1, ("fork_domain_child: Could not write status: "
+                         "nwritten=%d, error=%s\n", (int)nwritten,
+                         strerror(errno)));
+               _exit(0);
+       }
        if (!NT_STATUS_IS_OK(status)) {
                DEBUG(1, ("winbindd_reinit_after_fork failed: %s\n",
                          nt_errstr(status)));