dsdb: Avoid ERROR(ldb): uncaught exception - Deleted target CN=NTDS Settings... in...
authorAndrew Bartlett <abartlet@samba.org>
Thu, 9 Mar 2023 07:25:06 +0000 (20:25 +1300)
committerAndrew Bartlett <abartlet@samba.org>
Tue, 14 Mar 2023 06:16:30 +0000 (06:16 +0000)
"samba-tool domain join" uses the replication API in a strange way, perhaps no longer
required, except that we often still have folks upgrading from very old Samba versions.

When deferring the writing out to the DB of link replication to the very end, there
is a greater opportunity for the deletion of an object to have been sent with the
other objects, and have the link applied later.

This tells the repl_meta_data code to behave as if GET_TGT had been sent at the
time the link was returned, allowing a link to a deleted object to be silently
discarded.

BUG: https://bugzilla.samba.org/show_bug.cgi?id=15329

Signed-off-by: Andrew Bartlett <abartlet@samba.org>
Reviewed-by: Joseph Sutton <josephsutton@catalyst.net.nz>
python/samba/join.py
selftest/knownfail.d/replicate_against_deleted [deleted file]
source4/dsdb/samdb/ldb_modules/repl_meta_data.c

index 70b3c9729b0b714de437608e68e3e18c75cc375c..6c1ab3be7b46aa7daa6d1540e17bd9eed5036e72 100644 (file)
@@ -50,6 +50,7 @@ import tempfile
 from collections import OrderedDict
 from samba.common import get_string
 from samba.netcmd import CommandError
+from samba import dsdb
 
 
 class DCJoinException(Exception):
@@ -937,6 +938,10 @@ class DCJoinContext(object):
         """Replicate the SAM."""
 
         ctx.logger.info("Starting replication")
+
+        # A global transaction is started so that linked attributes
+        # are applied at the very end, once all partitions are
+        # replicated.  This helps get all cross-partition links.
         ctx.local_samdb.transaction_start()
         try:
             source_dsa_invocation_id = misc.GUID(ctx.samdb.get_invocation_id())
@@ -1057,7 +1062,21 @@ class DCJoinContext(object):
             ctx.local_samdb.transaction_cancel()
             raise
         else:
+
+            # This is a special case, we have completed a full
+            # replication so if a link comes to us that points to a
+            # deleted object, and we asked for all objects already, we
+            # just have to ignore it, the chance to re-try the
+            # replication with GET_TGT has long gone.  This can happen
+            # if the object is deleted and sent to us after the link
+            # was sent, as we are processing all links in the
+            # transaction_commit().
+            if not ctx.domain_replica_flags & drsuapi.DRSUAPI_DRS_CRITICAL_ONLY:
+                ctx.local_samdb.set_opaque_integer(dsdb.DSDB_FULL_JOIN_REPLICATION_COMPLETED_OPAQUE_NAME,
+                                                   1)
             ctx.local_samdb.transaction_commit()
+            ctx.local_samdb.set_opaque_integer(dsdb.DSDB_FULL_JOIN_REPLICATION_COMPLETED_OPAQUE_NAME,
+                                               0)
             ctx.logger.info("Committed SAM database")
 
         # A large replication may have caused our LDB connection to the
diff --git a/selftest/knownfail.d/replicate_against_deleted b/selftest/knownfail.d/replicate_against_deleted
deleted file mode 100644 (file)
index 9caa534..0000000
+++ /dev/null
@@ -1 +0,0 @@
-samba4.drs.ridalloc_exop.python\(.*\).ridalloc_exop.DrsReplicaSyncTestCase.test_replicate_against_deleted_objects_transaction
index c1ea5ad90f88e7e3c7012ba9d96ac772dc6a030e..175a02d3ba78495996a81eedf83556fce1647a2d 100644 (file)
@@ -7533,6 +7533,16 @@ static int replmd_allow_missing_target(struct ldb_module *module,
                                                  source_dn,
                                                  target_dn);
        if (is_in_same_nc) {
+               /*
+                * We allow the join.py code to point out that all
+                * replication is completed, so failing now would just
+                * trigger errors, rather than trigger a GET_TGT
+                */
+               int *finished_full_join_ptr =
+                       talloc_get_type(ldb_get_opaque(ldb,
+                                                      DSDB_FULL_JOIN_REPLICATION_COMPLETED_OPAQUE_NAME),
+                                       int);
+               bool finished_full_join = finished_full_join_ptr && *finished_full_join_ptr;
 
                /*
                 * if the target is already be up-to-date there's no point in
@@ -7540,7 +7550,8 @@ static int replmd_allow_missing_target(struct ldb_module *module,
                 * on a one-way link was deleted. We ignore the link rather
                 * than failing the replication cycle completely
                 */
-               if (dsdb_repl_flags & DSDB_REPL_FLAG_TARGETS_UPTODATE) {
+               if (finished_full_join
+                   || dsdb_repl_flags & DSDB_REPL_FLAG_TARGETS_UPTODATE) {
                        *ignore_link = true;
                        DBG_WARNING("%s is %s "
                                    "but up to date. Ignoring link from %s\n",