repl: Enforce that we have parent objects for all replicated objects
authorAndrew Bartlett <abartlet@samba.org>
Wed, 9 Dec 2015 04:05:56 +0000 (17:05 +1300)
committerAndrew Bartlett <abartlet@samba.org>
Mon, 6 Jun 2016 14:36:22 +0000 (16:36 +0200)
The creating of replicated objects without their parent object allows database corruption as they can end up under
the wrong object.  We need to re-try the replication with the DRSUAPI_DRS_GET_ANC flag
set to get the objects in tree order.

Signed-off-by: Andrew Bartlett <abartlet@samba.org>
Reviewed-by: Garming Sam <garming@catalyst.net.nz>
source4/dsdb/repl/drepl_out_helpers.c
source4/dsdb/repl/replicated_objects.c
source4/dsdb/samdb/ldb_modules/repl_meta_data.c
source4/dsdb/samdb/samdb.h

index 54f44c60f7eafd8416698557bd45643dce01ea05..6493c1ea49879f0f41f56d1cd3856e9b2d11b01b 100644 (file)
@@ -458,6 +458,10 @@ static void dreplsrv_op_pull_source_get_changes_trigger(struct tevent_req *req)
        replica_flags = rf1->replica_flags;
        highwatermark = rf1->highwatermark;
 
+       if (state->op->options & DRSUAPI_DRS_GET_ANC) {
+               replica_flags |= DRSUAPI_DRS_GET_ANC;
+       }
+
        if (partition->partial_replica) {
                status = dreplsrv_get_gc_partial_attribute_set(service, r, &pas);
                if (!NT_STATUS_IS_OK(status)) {
@@ -873,7 +877,27 @@ static void dreplsrv_op_pull_source_apply_changes_trigger(struct tevent_req *req
        talloc_free(objects);
 
        if (!W_ERROR_IS_OK(status)) {
-               nt_status = werror_to_ntstatus(WERR_BAD_NET_RESP);
+
+               /*
+                * If we failed to apply the records due to a missing
+                * parent, try again after asking for the parent
+                * records first.  Because we don't update the
+                * highwatermark, we start this part of the cycle
+                * again.
+                */
+               if (((state->op->options & DRSUAPI_DRS_GET_ANC) == 0)
+                   && W_ERROR_EQUAL(status, WERR_DS_DRA_MISSING_PARENT)) {
+                       state->op->options |= DRSUAPI_DRS_GET_ANC;
+                       DEBUG(4,("Missing parent object when we didn't set the DRSUAPI_DRS_GET_ANC flag, retrying\n"));
+                       dreplsrv_op_pull_source_get_changes_trigger(req);
+                       return;
+               } else if (((state->op->options & DRSUAPI_DRS_GET_ANC))
+                          && W_ERROR_EQUAL(status, WERR_DS_DRA_MISSING_PARENT)) {
+                       DEBUG(1,("Missing parent object despite setting DRSUAPI_DRS_GET_ANC flag\n"));
+                       nt_status = NT_STATUS_INVALID_NETWORK_RESPONSE;
+               } else {
+                       nt_status = werror_to_ntstatus(WERR_BAD_NET_RESP);
+               }
                DEBUG(0,("Failed to commit objects: %s/%s\n",
                          win_errstr(status), nt_errstr(nt_status)));
                tevent_req_nterror(req, nt_status);
index 44a766b03fe761af22509e4df29b1f71ea9b6380..33dd8211e349c04a76027509e3e30a2bf6493087 100644 (file)
@@ -348,15 +348,12 @@ WERROR dsdb_convert_object_ex(struct ldb_context *ldb,
                              TALLOC_CTX *mem_ctx,
                              struct dsdb_extended_replicated_object *out)
 {
-       NTSTATUS nt_status;
        WERROR status = WERR_OK;
        uint32_t i;
        struct ldb_message *msg;
        struct replPropertyMetaDataBlob *md;
        int instanceType;
        struct ldb_message_element *instanceType_e = NULL;
-       struct ldb_val guid_value;
-       struct ldb_val parent_guid_value;
        NTTIME whenChanged = 0;
        time_t whenChanged_t;
        const char *whenChanged_s;
@@ -615,23 +612,17 @@ WERROR dsdb_convert_object_ex(struct ldb_context *ldb,
        whenChanged_s = ldb_timestring(msg, whenChanged_t);
        W_ERROR_HAVE_NO_MEMORY(whenChanged_s);
 
-       nt_status = GUID_to_ndr_blob(&in->object.identifier->guid, msg, &guid_value);
-       if (!NT_STATUS_IS_OK(nt_status)) {
-               return ntstatus_to_werror(nt_status);
-       }
+       out->object_guid = in->object.identifier->guid;
 
-       if (in->parent_object_guid) {
-               nt_status = GUID_to_ndr_blob(in->parent_object_guid, msg, &parent_guid_value);
-               if (!NT_STATUS_IS_OK(nt_status)) {
-                       return ntstatus_to_werror(nt_status);
-               }
+       if (in->parent_object_guid == NULL) {
+               out->parent_guid = NULL;
        } else {
-               parent_guid_value = data_blob_null;
+               out->parent_guid = talloc(mem_ctx, struct GUID);
+               W_ERROR_HAVE_NO_MEMORY(out->parent_guid);
+               *out->parent_guid = *in->parent_object_guid;
        }
 
        out->msg                = msg;
-       out->guid_value         = guid_value;
-       out->parent_guid_value  = parent_guid_value;
        out->when_changed       = whenChanged_s;
        out->meta_data          = md;
        return WERR_OK;
@@ -844,6 +835,10 @@ WERROR dsdb_replicated_objects_commit(struct ldb_context *ldb,
                         ldb_errstring(ldb), ldb_strerror(ret)));
                ldb_transaction_cancel(ldb);
                TALLOC_FREE(tmp_ctx);
+
+               if (!W_ERROR_IS_OK(objects->error)) {
+                       return objects->error;
+               }
                return WERR_FOOBAR;
        }
        talloc_free(ext_res);
index b58a0074b86367613b1a216daf514d3815b9f19d..54a9572cfff845de47d67cf95a2bdfa53160202a 100644 (file)
@@ -91,6 +91,7 @@ struct replmd_replicated_request {
        struct dsdb_extended_replicated_objects *objs;
 
        struct ldb_message *search_msg;
+       struct GUID local_parent_guid;
 
        uint64_t seq_num;
        bool is_urgent;
@@ -3527,6 +3528,10 @@ static int replmd_replicated_request_werror(struct replmd_replicated_request *ar
 {
        int ret = LDB_ERR_OTHER;
        /* TODO: do some error mapping */
+
+       /* Let the caller know the full WERROR */
+       ar->objs->error = status;
+
        return ret;
 }
 
@@ -4014,7 +4019,9 @@ static int replmd_replicated_apply_add(struct replmd_replicated_request *ar)
                return replmd_replicated_request_error(ar, ret);
        }
 
-       ret = ldb_msg_add_value(msg, "objectGUID", &ar->objs->objects[ar->index_current].guid_value, NULL);
+       ret = dsdb_msg_add_guid(msg,
+                               &ar->objs->objects[ar->index_current].object_guid,
+                               "objectGUID");
        if (ret != LDB_SUCCESS) {
                return replmd_replicated_request_error(ar, ret);
        }
@@ -4154,12 +4161,14 @@ static int replmd_replicated_apply_search_for_parent_callback(struct ldb_request
                return ldb_module_done(ar->req, NULL, NULL,
                                        LDB_ERR_OPERATIONS_ERROR);
        }
-       if (ares->error != LDB_SUCCESS &&
-           ares->error != LDB_ERR_NO_SUCH_OBJECT) {
-               /*
-                * TODO: deal with the above error that the parent object doesn't exist
-                */
 
+       /*
+        * The error NO_SUCH_OBJECT is not expected, unless the search
+        * base is the partition DN, and that case doesn't happen here
+        * because then we wouldn't get a parent_guid_value in any
+        * case.
+        */
+       if (ares->error != LDB_SUCCESS) {
                return ldb_module_done(ar->req, ares->controls,
                                        ares->response, ares->error);
        }
@@ -4217,9 +4226,13 @@ static int replmd_replicated_apply_search_for_parent_callback(struct ldb_request
                        }
                        ar->objs->objects[ar->index_current].last_known_parent
                                = talloc_steal(ar->objs->objects[ar->index_current].msg, parent_msg->dn);
+
                } else {
-                       parent_dn = parent_msg->dn;
+                       parent_dn
+                               = talloc_steal(ar->objs->objects[ar->index_current].msg, parent_msg->dn);
+
                }
+               ar->objs->objects[ar->index_current].local_parent_dn = parent_dn;
 
                comp_num = ldb_dn_get_comp_num(msg->dn);
                if (comp_num > 1) {
@@ -4239,6 +4252,32 @@ static int replmd_replicated_apply_search_for_parent_callback(struct ldb_request
                break;
 
        case LDB_REPLY_DONE:
+
+               if (ar->objs->objects[ar->index_current].local_parent_dn == NULL) {
+                       struct GUID_txt_buf str_buf;
+                       if (ar->search_msg != NULL) {
+                               ldb_asprintf_errstring(ldb_module_get_ctx(ar->module),
+                                                      "No parent with GUID %s found for object locally known as %s",
+                                                      GUID_buf_string(ar->objs->objects[ar->index_current].parent_guid, &str_buf),
+                                                      ldb_dn_get_linearized(ar->search_msg->dn));
+                       } else {
+                               ldb_asprintf_errstring(ldb_module_get_ctx(ar->module),
+                                                      "No parent with GUID %s found for object remotely known as %s",
+                                                      GUID_buf_string(ar->objs->objects[ar->index_current].parent_guid, &str_buf),
+                                                      ldb_dn_get_linearized(ar->objs->objects[ar->index_current].msg->dn));
+                       }
+
+                       /*
+                        * This error code is really important, as it
+                        * is the flag back to the callers to retry
+                        * this with DRSUAPI_DRS_GET_ANC, and so get
+                        * the parent objects before the child
+                        * objects
+                        */
+                       return ldb_module_done(ar->req, NULL, NULL,
+                                              replmd_replicated_request_werror(ar, WERR_DS_DRA_MISSING_PARENT));
+               }
+
                if (ar->search_msg != NULL) {
                        ret = replmd_replicated_apply_merge(ar);
                } else {
@@ -4268,10 +4307,11 @@ static int replmd_replicated_apply_search_for_parent(struct replmd_replicated_re
        char *filter;
        struct ldb_request *search_req;
        static const char *attrs[] = {"isDeleted", NULL};
+       struct GUID_txt_buf guid_str_buf;
 
        ldb = ldb_module_get_ctx(ar->module);
 
-       if (!ar->objs->objects[ar->index_current].parent_guid_value.data) {
+       if (ar->objs->objects[ar->index_current].parent_guid == NULL) {
                if (ar->search_msg != NULL) {
                        return replmd_replicated_apply_merge(ar);
                } else {
@@ -4279,12 +4319,11 @@ static int replmd_replicated_apply_search_for_parent(struct replmd_replicated_re
                }
        }
 
-       tmp_str = ldb_binary_encode(ar, ar->objs->objects[ar->index_current].parent_guid_value);
-       if (!tmp_str) return replmd_replicated_request_werror(ar, WERR_NOMEM);
+       tmp_str = GUID_buf_string(ar->objs->objects[ar->index_current].parent_guid,
+                                 &guid_str_buf);
 
        filter = talloc_asprintf(ar, "(objectGUID=%s)", tmp_str);
        if (!filter) return replmd_replicated_request_werror(ar, WERR_NOMEM);
-       talloc_free(tmp_str);
 
        ret = ldb_build_search_req(&search_req,
                                   ldb,
@@ -4375,6 +4414,7 @@ static int replmd_replicated_apply_merge(struct replmd_replicated_request *ar)
        const struct ldb_val *omd_value;
        struct replPropertyMetaDataBlob nmd;
        struct ldb_val nmd_value;
+       struct GUID remote_parent_guid;
        unsigned int i;
        uint32_t j,ni=0;
        unsigned int removed_attrs = 0;
@@ -4386,6 +4426,7 @@ static int replmd_replicated_apply_merge(struct replmd_replicated_request *ar)
        bool take_remote_isDeleted = false;
        bool sd_updated = false;
        bool renamed = false;
+       NTSTATUS nt_status;
 
        ldb = ldb_module_get_ctx(ar->module);
        msg = ar->objs->objects[ar->index_current].msg;
@@ -4400,7 +4441,7 @@ static int replmd_replicated_apply_merge(struct replmd_replicated_request *ar)
                ndr_err = ndr_pull_struct_blob(omd_value, ar, &omd,
                                               (ndr_pull_flags_fn_t)ndr_pull_replPropertyMetaDataBlob);
                if (!NDR_ERR_CODE_IS_SUCCESS(ndr_err)) {
-                       NTSTATUS nt_status = ndr_map_error2ntstatus(ndr_err);
+                       nt_status = ndr_map_error2ntstatus(ndr_err);
                        return replmd_replicated_request_werror(ar, ntstatus_to_werror(nt_status));
                }
 
@@ -4414,7 +4455,27 @@ static int replmd_replicated_apply_merge(struct replmd_replicated_request *ar)
        remote_isDeleted = ldb_msg_find_attr_as_bool(msg,
                                                     "isDeleted", false);
 
-       if (strcmp(ldb_dn_get_linearized(msg->dn), ldb_dn_get_linearized(ar->search_msg->dn)) == 0) {
+       /*
+        * Fill in the remote_parent_guid with the GUID or an all-zero
+        * GUID.
+        */
+       if (ar->objs->objects[ar->index_current].parent_guid != NULL) {
+               remote_parent_guid = *ar->objs->objects[ar->index_current].parent_guid;
+       } else {
+               remote_parent_guid = GUID_zero();
+       }
+
+       /*
+        * To ensure we follow a complex rename chain around, we have
+        * to confirm that the DN is the same (mostly to confirm the
+        * RDN) and the parentGUID is the same.
+        *
+        * This ensures we keep things under the correct parent, which
+        * replmd_replicated_handle_rename() will do.
+        */
+
+       if (strcmp(ldb_dn_get_linearized(msg->dn), ldb_dn_get_linearized(ar->search_msg->dn)) == 0
+           && GUID_equal(&remote_parent_guid, &ar->local_parent_guid)) {
                ret = LDB_SUCCESS;
        } else {
                /*
@@ -4437,14 +4498,10 @@ static int replmd_replicated_apply_merge(struct replmd_replicated_request *ar)
         * need to rename the incoming record
         */
        if (ret == LDB_ERR_ENTRY_ALREADY_EXISTS) {
-               struct GUID guid;
-               NTSTATUS status;
                struct ldb_dn *new_dn;
-               status = GUID_from_ndr_blob(&ar->objs->objects[ar->index_current].guid_value, &guid);
-               /* This really, really can't fail */
-               SMB_ASSERT(NT_STATUS_IS_OK(status));
 
-               new_dn = replmd_conflict_dn(msg, msg->dn, &guid);
+               new_dn = replmd_conflict_dn(msg, msg->dn,
+                                           &ar->objs->objects[ar->index_current].object_guid);
                if (new_dn == NULL) {
                        ldb_asprintf_errstring(ldb_module_get_ctx(ar->module),
                                                                  "Failed to form conflict DN for %s\n",
@@ -4648,7 +4705,7 @@ static int replmd_replicated_apply_merge(struct replmd_replicated_request *ar)
        ndr_err = ndr_push_struct_blob(&nmd_value, msg, &nmd,
                                       (ndr_push_flags_fn_t)ndr_push_replPropertyMetaDataBlob);
        if (!NDR_ERR_CODE_IS_SUCCESS(ndr_err)) {
-               NTSTATUS nt_status = ndr_map_error2ntstatus(ndr_err);
+               nt_status = ndr_map_error2ntstatus(ndr_err);
                return replmd_replicated_request_werror(ar, ntstatus_to_werror(nt_status));
        }
 
@@ -4745,7 +4802,7 @@ static int replmd_replicated_apply_search_callback(struct ldb_request *req,
                const struct ldb_val *omd_value;
                struct replPropertyMetaDataBlob *rmd;
                struct ldb_message *msg;
-
+               ar->objs->objects[ar->index_current].local_parent_dn = NULL;
                ar->objs->objects[ar->index_current].last_known_parent = NULL;
 
                /*
@@ -4787,6 +4844,8 @@ static int replmd_replicated_apply_search_callback(struct ldb_request *req,
                        }
                }
 
+               ar->local_parent_guid = samdb_result_guid(ar->search_msg, "parentGUID");
+
                /*
                 * now we need to check for double renames. We could have a
                 * local rename pending which our replication partner hasn't
@@ -4811,9 +4870,13 @@ static int replmd_replicated_apply_search_callback(struct ldb_request *req,
 
                        /*
                         * This assignment ensures that the strcmp()
-                        * in replmd_replicated_apply_merge() avoids
-                        * the rename call
+                        * and GUID_equal() calls in
+                        * replmd_replicated_apply_merge() avoids the
+                        * rename call
                         */
+                       ar->objs->objects[ar->index_current].parent_guid =
+                               &ar->local_parent_guid;
+
                        msg->dn = ar->search_msg->dn;
                        ret = replmd_replicated_apply_merge(ar);
                }
@@ -4836,9 +4899,10 @@ static int replmd_replicated_apply_next(struct replmd_replicated_request *ar)
        char *tmp_str;
        char *filter;
        struct ldb_request *search_req;
-       static const char *attrs[] = { "*", "instanceType",
+       static const char *attrs[] = { "*", "parentGUID", "instanceType",
                                       "replPropertyMetaData", "nTSecurityDescriptor",
                                       NULL };
+       struct GUID_txt_buf guid_str_buf;
 
        if (ar->index_current >= ar->objs->num_objects) {
                /* done with it, go to next stage */
@@ -4849,12 +4913,11 @@ static int replmd_replicated_apply_next(struct replmd_replicated_request *ar)
        ar->search_msg = NULL;
        ar->isDeleted = false;
 
-       tmp_str = ldb_binary_encode(ar, ar->objs->objects[ar->index_current].guid_value);
-       if (!tmp_str) return replmd_replicated_request_werror(ar, WERR_NOMEM);
+       tmp_str = GUID_buf_string(&ar->objs->objects[ar->index_current].object_guid,
+                                 &guid_str_buf);
 
        filter = talloc_asprintf(ar, "(objectGUID=%s)", tmp_str);
        if (!filter) return replmd_replicated_request_werror(ar, WERR_NOMEM);
-       talloc_free(tmp_str);
 
        ret = ldb_build_search_req(&search_req,
                                   ldb,
index 0a1d90d8b408b6b84a574a82c8bea8c970d070a0..d4f854dfc641319ad7da9f38223856adddf94743 100644 (file)
@@ -161,13 +161,14 @@ struct dsdb_control_password_change {
 #define DSDB_EXTENDED_REPLICATED_OBJECTS_OID "1.3.6.1.4.1.7165.4.4.1"
 struct dsdb_extended_replicated_object {
        struct ldb_message *msg;
-       struct ldb_val guid_value;
-       struct ldb_val parent_guid_value;
+       struct GUID object_guid;
+       struct GUID *parent_guid;
        const char *when_changed;
        struct replPropertyMetaDataBlob *meta_data;
 
        /* Only used for internal processing in repl_meta_data */
        struct ldb_dn *last_known_parent;
+       struct ldb_dn *local_parent_dn;
 };
 
 struct dsdb_extended_replicated_objects {
@@ -191,6 +192,8 @@ struct dsdb_extended_replicated_objects {
 
        uint32_t linked_attributes_count;
        const struct drsuapi_DsReplicaLinkedAttribute *linked_attributes;
+
+       WERROR error;
 };
 
 #define DSDB_EXTENDED_CREATE_PARTITION_OID "1.3.6.1.4.1.7165.4.4.4"