Here is a re-working of the winbindd
authorJeremy Allison <jra@samba.org>
Wed, 20 Aug 2008 23:24:22 +0000 (16:24 -0700)
committerJeremy Allison <jra@samba.org>
Wed, 20 Aug 2008 23:24:22 +0000 (16:24 -0700)
reconnect code to cope with rebooting a DC. This
replaces the code I asked Volker to revert.
The logic is pretty simple. It adds a new parameter,
"winbind reconnect delay", set to 30 seconds by
default, which determines how long to wait between
connection attempts.
To avoid overwhelming the box with DC-probe
forked children, the code now keeps track of
the DC probe child per winbindd_domain struct
and only starts a new one if the existing one
has died.
I also added a little logic to make sure the
dc probe child always sends a message whatever
the reason for exit so we will always reschedule
another connect attempt.
Also added documentation.
Jeremy.
(This used to be commit 8027197635b988b3dcf9d3d00126a024e768fa62)

docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml [new file with mode: 0644]
source3/include/proto.h
source3/param/loadparm.c
source3/winbindd/winbindd.h
source3/winbindd/winbindd_cm.c
source3/winbindd/winbindd_util.c

diff --git a/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml b/docs-xml/smbdotconf/winbind/winbindreconnectdelay.xml
new file mode 100644 (file)
index 0000000..2da263e
--- /dev/null
@@ -0,0 +1,15 @@
+<samba:parameter name="winbind reconnect delay"
+                 context="G"
+                                type="integer"
+                 advanced="1" developer="1"
+                 xmlns:samba="http://www.samba.org/samba/DTD/samba-doc">
+<description>
+       <para>This parameter specifies the number of
+       seconds the <citerefentry><refentrytitle>winbindd</refentrytitle>
+       <manvolnum>8</manvolnum></citerefentry> daemon will wait between
+       attempts to contact a Domain controller for a domain that is
+       determined to be down or not contactable.</para>
+</description>
+
+<value type="default">30</value>
+</samba:parameter>
index 3d0d419022eb4143363b823ab8f115e86220f459..d3a8dbbc7fe93683ef30e26006527de476d2da51 100644 (file)
@@ -6049,6 +6049,7 @@ int lp_directory_name_cache_size(int );
 int lp_smb_encrypt(int );
 char lp_magicchar(const struct share_params *p );
 int lp_winbind_cache_time(void);
+int lp_winbind_reconnect_delay(void);
 const char **lp_winbind_nss_info(void);
 int lp_algorithmic_rid_base(void);
 int lp_name_cache_timeout(void);
index bc111df4e1f701fe4aac2630a6c921ee8997a90f..6817eca1d19b0089c9ab47c8e08b44fde5352515 100644 (file)
@@ -240,6 +240,7 @@ struct global {
        int map_to_guest;
        int oplock_break_wait_time;
        int winbind_cache_time;
+       int winbind_reconnect_delay;
        int winbind_max_idle_children;
        char **szWinbindNssInfo;
        int iLockSpinTime;
@@ -4362,6 +4363,15 @@ static struct parm_struct parm_table[] = {
                .enum_list      = NULL,
                .flags          = FLAG_ADVANCED,
        },
+       {
+               .label          = "winbind reconnect delay",
+               .type           = P_INTEGER,
+               .p_class        = P_GLOBAL,
+               .ptr            = &Globals.winbind_reconnect_delay,
+               .special        = NULL,
+               .enum_list      = NULL,
+               .flags          = FLAG_ADVANCED,
+       },
        {
                .label          = "winbind enum users",
                .type           = P_BOOL,
@@ -4829,6 +4839,7 @@ static void init_globals(bool first_time_only)
        Globals.clustering = False;
 
        Globals.winbind_cache_time = 300;       /* 5 minutes */
+       Globals.winbind_reconnect_delay = 30;   /* 30 seconds */
        Globals.bWinbindEnumUsers = False;
        Globals.bWinbindEnumGroups = False;
        Globals.bWinbindUseDefaultDomain = False;
@@ -5341,6 +5352,7 @@ FN_LOCAL_INTEGER(lp_directory_name_cache_size, iDirectoryNameCacheSize)
 FN_LOCAL_INTEGER(lp_smb_encrypt, ismb_encrypt)
 FN_LOCAL_CHAR(lp_magicchar, magic_char)
 FN_GLOBAL_INTEGER(lp_winbind_cache_time, &Globals.winbind_cache_time)
+FN_GLOBAL_INTEGER(lp_winbind_reconnect_delay, &Globals.winbind_reconnect_delay)
 FN_GLOBAL_LIST(lp_winbind_nss_info, &Globals.szWinbindNssInfo)
 FN_GLOBAL_INTEGER(lp_algorithmic_rid_base, &Globals.AlgorithmicRidBase)
 FN_GLOBAL_INTEGER(lp_name_cache_timeout, &Globals.name_cache_timeout)
index fe0c0762093705fff557018f72a80aa5cfad134f..1b8cd9163f6b1f279eadb85d011ebdfb5f2a3ead 100644 (file)
@@ -204,6 +204,7 @@ struct winbindd_domain {
        uint32_t id_range_low, id_range_high;
 
        /* A working DC */
+       pid_t dc_probe_pid; /* Child we're using to detect the DC. */
        fstring dcname;
        struct sockaddr_storage dcaddr;
 
index 47df4e405893c14ddeec501fb2b5c0c29fe99e9e..69e95b1c05476290ee3d5e19b43c1c0327ed6888 100644 (file)
@@ -171,20 +171,33 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain)
        struct dc_name_ip *dcs = NULL;
        int num_dcs = 0;
        TALLOC_CTX *mem_ctx = NULL;
-       pid_t child_pid;
        pid_t parent_pid = sys_getpid();
 
        /* Stop zombies */
        CatchChild();
 
-       child_pid = sys_fork();
+       if (domain->dc_probe_pid != (pid_t)-1) {
+               /*
+                * We might already have a DC probe
+                * child working, check.
+                */
+               if (process_exists_by_pid(domain->dc_probe_pid)) {
+                       DEBUG(10,("fork_child_dc_connect: pid %u already "
+                               "checking for DC's.\n",
+                               (unsigned int)domain->dc_probe_pid));
+                       return true;
+               }
+               domain->dc_probe_pid = (pid_t)-1;
+       }
 
-       if (child_pid == -1) {
+       domain->dc_probe_pid = sys_fork();
+
+       if (domain->dc_probe_pid == (pid_t)-1) {
                DEBUG(0, ("fork_child_dc_connect: Could not fork: %s\n", strerror(errno)));
                return False;
        }
 
-       if (child_pid != 0) {
+       if (domain->dc_probe_pid != (pid_t)0) {
                /* Parent */
                messaging_register(winbind_messaging_context(), NULL,
                                   MSG_WINBIND_TRY_TO_GO_ONLINE,
@@ -201,6 +214,11 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain)
 
        if (!reinit_after_fork(winbind_messaging_context(), true)) {
                DEBUG(0,("reinit_after_fork() failed\n"));
+               messaging_send_buf(winbind_messaging_context(),
+                                  pid_to_procid(parent_pid),
+                                  MSG_WINBIND_FAILED_TO_GO_ONLINE,
+                                  (uint8 *)domain->name,
+                                  strlen(domain->name)+1);
                _exit(0);
        }
 
@@ -218,6 +236,11 @@ static bool fork_child_dc_connect(struct winbindd_domain *domain)
        mem_ctx = talloc_init("fork_child_dc_connect");
        if (!mem_ctx) {
                DEBUG(0,("talloc_init failed.\n"));
+               messaging_send_buf(winbind_messaging_context(),
+                                  pid_to_procid(parent_pid),
+                                  MSG_WINBIND_FAILED_TO_GO_ONLINE,
+                                  (uint8 *)domain->name,
+                                  strlen(domain->name)+1);
                _exit(0);
        }
 
@@ -291,12 +314,12 @@ static void check_domain_online_handler(struct event_context *ctx,
 
 static void calc_new_online_timeout_check(struct winbindd_domain *domain)
 {
-       int wbc = lp_winbind_cache_time();
+       int wbr = lp_winbind_reconnect_delay();
 
        if (domain->startup) {
                domain->check_online_timeout = 10;
-       } else if (domain->check_online_timeout < wbc) {
-               domain->check_online_timeout = wbc;
+       } else if (domain->check_online_timeout < wbr) {
+               domain->check_online_timeout = wbr;
        }
 }
 
@@ -336,7 +359,7 @@ void set_domain_offline(struct winbindd_domain *domain)
        }
 
        /* If we're in statup mode, check again in 10 seconds, not in
-          lp_winbind_cache_time() seconds (which is 5 mins by default). */
+          lp_winbind_reconnect_delay() seconds (which is 30 seconds by default). */
 
        calc_new_online_timeout_check(domain);
 
index 77b17787c999d7f6032fb71f53e96a18c324e71d..4668d3725dc11b0ec4014681f58a310a288b84d1 100644 (file)
@@ -180,11 +180,11 @@ static struct winbindd_domain *add_trusted_domain(const char *domain_name, const
        domain->initialized = False;
        domain->online = is_internal_domain(sid);
        domain->check_online_timeout = 0;
+       domain->dc_probe_pid = (pid_t)-1;
        if (sid) {
                sid_copy(&domain->sid, sid);
        }
 
-       
        /* Link to domain list */
        DLIST_ADD_END(_domain_list, domain, struct winbindd_domain *);