2 * Copyright (C) 2004-2010 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1999-2003 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: rbtdb.c,v 1.292.8.9 2010/05/10 01:41:11 marka Exp $ */
23 * Principal Author: Bob Halley
30 #include <isc/event.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
45 #include <dns/acache.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
52 #include <dns/masterdump.h>
54 #include <dns/nsec3.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
65 #include <dns/zonekey.h>
67 #ifdef DNS_RBTDB_VERSION64
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '8')
76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
80 * Note that "impmagic" is not the first four bytes of the struct, so
81 * ISC_MAGIC_VALID cannot be used.
83 #define VALID_RBTDB(rbtdb) ((rbtdb) != NULL && \
84 (rbtdb)->common.impmagic == RBTDB_MAGIC)
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t rbtdb_serial_t;
89 * Make casting easier in symbolic debuggers by using different names
90 * for the 64 bit version.
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
96 typedef isc_uint32_t rbtdb_serial_t;
99 typedef isc_uint32_t rbtdb_rdatatype_t;
101 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type) ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e) ((rbtdb_rdatatype_t)((e) << 16) | (b))
105 #define RBTDB_RDATATYPE_SIGNSEC \
106 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
119 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120 * Using rwlock is effective with regard to lookup performance only when
121 * it is implemented in an efficient way.
122 * Otherwise, it is generally wise to stick to the simple locking since rwlock
123 * would require more memory or can even make lookups slower due to its own
124 * overhead (when it internally calls mutex locks).
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
129 #define DNS_RBTDB_USERWLOCK 0
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l) isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t) RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t) RWUNLOCK((l), (t))
138 #define RBTDB_INITLOCK(l) isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l) DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t) LOCK(l)
141 #define RBTDB_UNLOCK(l, t) UNLOCK(l)
145 * Since node locking is sensitive to both performance and memory footprint,
146 * we need some trick here. If we have both high-performance rwlock and
147 * high performance and small-memory reference counters, we use rwlock for
148 * node lock and isc_refcount for node references. In this case, we don't have
149 * to protect the access to the counters by locks.
150 * Otherwise, we simply use ordinary mutex lock for node locking, and use
151 * simple integers as reference counters which is protected by the lock.
152 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153 * NODE_UNLOCK. In some other cases, however, we need to protect reference
154 * counters first and then protect other parts of a node as read-only data.
155 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156 * provided for these special cases. When we can use the efficient backend
157 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159 * section including the access to the reference counter.
160 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161 * section is also protected by NODE_STRONGLOCK().
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
166 #define NODE_INITLOCK(l) isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t) RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t) RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l) isc_rwlock_tryupgrade(l)
172 #define NODE_STRONGLOCK(l) ((void)0)
173 #define NODE_STRONGUNLOCK(l) ((void)0)
174 #define NODE_WEAKLOCK(l, t) NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t) NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l) isc_rwlock_downgrade(l)
178 typedef isc_mutex_t nodelock_t;
180 #define NODE_INITLOCK(l) isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l) DESTROYLOCK(l)
182 #define NODE_LOCK(l, t) LOCK(l)
183 #define NODE_UNLOCK(l, t) UNLOCK(l)
184 #define NODE_TRYUPGRADE(l) ISC_R_SUCCESS
186 #define NODE_STRONGLOCK(l) LOCK(l)
187 #define NODE_STRONGUNLOCK(l) UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t) ((void)0)
189 #define NODE_WEAKUNLOCK(l, t) ((void)0)
190 #define NODE_WEAKDOWNGRADE(l) ((void)0)
194 * Whether to rate-limit updating the LRU to avoid possible thread contention.
195 * Our performance measurement has shown the cost is marginal, so it's defined
196 * to be 0 by default either with or without threads.
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
203 * Allow clients with a virtual time of up to 5 minutes in the past to see
204 * records that would have otherwise have expired.
206 #define RBTDB_VIRTUAL 300
212 dns_rdatatype_t type;
215 typedef struct acachectl acachectl_t;
217 typedef struct rdatasetheader {
219 * Locked by the owning node's lock.
221 rbtdb_serial_t serial;
223 rbtdb_rdatatype_t type;
224 isc_uint16_t attributes;
226 struct noqname *noqname;
227 struct noqname *closest;
229 * We don't use the LIST macros, because the LIST structure has
230 * both head and tail pointers, and is doubly linked.
233 struct rdatasetheader *next;
235 * If this is the top header for an rdataset, 'next' points
236 * to the top header for the next rdataset (i.e., the next type).
237 * Otherwise, it points up to the header whose down pointer points
241 struct rdatasetheader *down;
243 * Points to the header for the next older version of
249 * Monotonously increased every time this rdataset is bound so that
250 * it is used as the base of the starting point in DNS responses
251 * when the "cyclic" rrset-order is required. Since the ordering
252 * should not be so crucial, no lock is set for the counter for
253 * performance reasons.
256 acachectl_t *additional_auth;
257 acachectl_t *additional_glue;
260 isc_stdtime_t last_used;
261 ISC_LINK(struct rdatasetheader) link;
263 unsigned int heap_index;
265 * Used for TTL-based cache cleaning.
267 isc_stdtime_t resign;
270 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
273 #define RDATASET_ATTR_NONEXISTENT 0x0001
274 #define RDATASET_ATTR_STALE 0x0002
275 #define RDATASET_ATTR_IGNORE 0x0004
276 #define RDATASET_ATTR_RETAIN 0x0008
277 #define RDATASET_ATTR_NXDOMAIN 0x0010
278 #define RDATASET_ATTR_RESIGN 0x0020
279 #define RDATASET_ATTR_STATCOUNT 0x0040
280 #define RDATASET_ATTR_OPTOUT 0x0080
282 typedef struct acache_cbarg {
283 dns_rdatasetadditional_t type;
287 rdatasetheader_t *header;
291 dns_acacheentry_t *entry;
292 acache_cbarg_t *cbarg;
297 * When the cache will pre-expire data (due to memory low or other
298 * situations) before the rdataset's TTL has expired, it MUST
299 * respect the RETAIN bit and not expire the data until its TTL is
303 #undef IGNORE /* WIN32 winbase.h defines this. */
305 #define EXISTS(header) \
306 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308 (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310 (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312 (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314 (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316 (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318 (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
320 #define DEFAULT_NODE_LOCK_COUNT 7 /*%< Should be prime. */
323 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324 * There is a tradeoff issue about configuring this value: if this is too
325 * small, it may cause heavier contention between threads; if this is too large,
326 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327 * The default value should work well for most environments, but this can
328 * also be configurable at compilation time via the
329 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable. This value must be larger than
330 * 1 due to the assumption of overmem_purge().
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT 16
340 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
344 /* Protected in the refcount routines. */
345 isc_refcount_t references;
346 /* Locked by lock. */
347 isc_boolean_t exiting;
350 typedef struct rbtdb_changed {
351 dns_rbtnode_t * node;
353 ISC_LINK(struct rbtdb_changed) link;
356 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
364 typedef struct rbtdb_version {
366 rbtdb_serial_t serial;
368 * Protected in the refcount routines.
369 * XXXJT: should we change the lock policy based on the refcount
372 isc_refcount_t references;
373 /* Locked by database lock. */
374 isc_boolean_t writer;
375 isc_boolean_t commit_ok;
376 rbtdb_changedlist_t changed_list;
377 rdatasetheaderlist_t resigned_list;
378 ISC_LINK(struct rbtdb_version) link;
379 dns_db_secure_t secure;
380 isc_boolean_t havensec3;
381 /* NSEC3 parameters */
384 isc_uint16_t iterations;
385 isc_uint8_t salt_length;
386 unsigned char salt[DNS_NSEC3_SALTSIZE];
389 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
394 #if DNS_RBTDB_USERWLOCK
399 isc_rwlock_t tree_lock;
400 unsigned int node_lock_count;
401 rbtdb_nodelock_t * node_locks;
402 dns_rbtnode_t * origin_node;
403 dns_stats_t * rrsetstats; /* cache DB only */
404 /* Locked by lock. */
406 isc_refcount_t references;
407 unsigned int attributes;
408 rbtdb_serial_t current_serial;
409 rbtdb_serial_t least_serial;
410 rbtdb_serial_t next_serial;
411 rbtdb_version_t * current_version;
412 rbtdb_version_t * future_version;
413 rbtdb_versionlist_t open_versions;
414 isc_boolean_t overmem;
416 dns_dbnode_t *soanode;
417 dns_dbnode_t *nsnode;
420 * This is a linked list used to implement the LRU cache. There will
421 * be node_lock_count linked lists here. Nodes in bucket 1 will be
422 * placed on the linked list rdatasets[1].
424 rdatasetheaderlist_t *rdatasets;
427 * Temporary storage for stale cache nodes and dynamically deleted
428 * nodes that await being cleaned up.
430 rbtnodelist_t *deadnodes;
433 * Heaps. Each of these is used for TTL based expiry.
437 /* Locked by tree_lock. */
443 unsigned int quantum;
446 #define RBTDB_ATTR_LOADED 0x01
447 #define RBTDB_ATTR_LOADING 0x02
454 rbtdb_version_t * rbtversion;
455 rbtdb_serial_t serial;
456 unsigned int options;
457 dns_rbtnodechain_t chain;
458 isc_boolean_t copy_name;
459 isc_boolean_t need_cleanup;
461 dns_rbtnode_t * zonecut;
462 rdatasetheader_t * zonecut_rdataset;
463 rdatasetheader_t * zonecut_sigrdataset;
464 dns_fixedname_t zonecut_name;
476 static void rdataset_disassociate(dns_rdataset_t *rdataset);
477 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
478 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
479 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
480 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
481 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
482 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
485 dns_rdataset_t *negsig);
486 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
489 dns_rdataset_t *negsig);
490 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
491 dns_rdatasetadditional_t type,
492 dns_rdatatype_t qtype,
493 dns_acache_t *acache,
496 dns_dbversion_t **versionp,
497 dns_dbnode_t **nodep,
501 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
502 dns_rdatasetadditional_t type,
503 dns_rdatatype_t qtype,
504 dns_acache_t *acache,
507 dns_dbversion_t *version,
510 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
511 dns_rdataset_t *rdataset,
512 dns_rdatasetadditional_t type,
513 dns_rdatatype_t qtype);
514 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
516 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
518 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
519 isc_boolean_t tree_locked);
520 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
521 isc_stdtime_t now, isc_boolean_t tree_locked);
522 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
523 rdatasetheader_t *newheader);
524 static void prune_tree(isc_task_t *task, isc_event_t *event);
525 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
526 static void rdataset_expire(dns_rdataset_t *rdataset);
528 static dns_rdatasetmethods_t rdataset_methods = {
529 rdataset_disassociate,
539 rdataset_getadditional,
540 rdataset_setadditional,
541 rdataset_putadditional,
546 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
547 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
548 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
549 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
550 dns_rdataset_t *rdataset);
552 static dns_rdatasetitermethods_t rdatasetiter_methods = {
553 rdatasetiter_destroy,
559 typedef struct rbtdb_rdatasetiter {
560 dns_rdatasetiter_t common;
561 rdatasetheader_t * current;
562 } rbtdb_rdatasetiter_t;
564 static void dbiterator_destroy(dns_dbiterator_t **iteratorp);
565 static isc_result_t dbiterator_first(dns_dbiterator_t *iterator);
566 static isc_result_t dbiterator_last(dns_dbiterator_t *iterator);
567 static isc_result_t dbiterator_seek(dns_dbiterator_t *iterator,
569 static isc_result_t dbiterator_prev(dns_dbiterator_t *iterator);
570 static isc_result_t dbiterator_next(dns_dbiterator_t *iterator);
571 static isc_result_t dbiterator_current(dns_dbiterator_t *iterator,
572 dns_dbnode_t **nodep,
574 static isc_result_t dbiterator_pause(dns_dbiterator_t *iterator);
575 static isc_result_t dbiterator_origin(dns_dbiterator_t *iterator,
578 static dns_dbiteratormethods_t dbiterator_methods = {
590 #define DELETION_BATCH_MAX 64
593 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
595 typedef struct rbtdb_dbiterator {
596 dns_dbiterator_t common;
597 isc_boolean_t paused;
598 isc_boolean_t new_origin;
599 isc_rwlocktype_t tree_locked;
601 dns_fixedname_t name;
602 dns_fixedname_t origin;
603 dns_rbtnodechain_t chain;
604 dns_rbtnodechain_t nsec3chain;
605 dns_rbtnodechain_t *current;
607 dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
609 isc_boolean_t nsec3only;
610 isc_boolean_t nonsec3;
611 } rbtdb_dbiterator_t;
614 #define IS_STUB(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
615 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
617 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
619 static void overmem(dns_db_t *db, isc_boolean_t overmem);
621 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
625 * 'init_count' is used to initialize 'newheader->count' which inturn
626 * is used to determine where in the cycle rrset-order cyclic starts.
627 * We don't lock this as we don't care about simultaneous updates.
630 * Both init_count and header->count can be ISC_UINT32_MAX.
631 * The count on the returned rdataset however can't be as
632 * that indicates that the database does not implement cyclic
635 static unsigned int init_count;
640 * If a routine is going to lock more than one lock in this module, then
641 * the locking must be done in the following order:
645 * Node Lock (Only one from the set may be locked at one time by
650 * Failure to follow this hierarchy can result in deadlock.
656 * For zone databases the node for the origin of the zone MUST NOT be deleted.
665 attach(dns_db_t *source, dns_db_t **targetp) {
666 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
668 REQUIRE(VALID_RBTDB(rbtdb));
670 isc_refcount_increment(&rbtdb->references, NULL);
676 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
677 dns_rbtdb_t *rbtdb = event->ev_arg;
681 free_rbtdb(rbtdb, ISC_TRUE, event);
685 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
686 isc_boolean_t increment)
688 dns_rdatastatstype_t statattributes = 0;
689 dns_rdatastatstype_t base = 0;
690 dns_rdatastatstype_t type;
692 /* At the moment we count statistics only for cache DB */
693 INSIST(IS_CACHE(rbtdb));
695 if (NXDOMAIN(header))
696 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
697 else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
698 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
699 base = RBTDB_RDATATYPE_EXT(header->type);
701 base = RBTDB_RDATATYPE_BASE(header->type);
703 type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
705 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
707 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
711 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
716 oldttl = header->rdh_ttl;
717 header->rdh_ttl = newttl;
719 if (!IS_CACHE(rbtdb))
723 * It's possible the rbtdb is not a cache. If this is the case,
724 * we will not have a heap, and we move on. If we do, though,
725 * we might need to adjust things.
727 if (header->heap_index == 0 || newttl == oldttl)
729 idx = header->node->locknum;
730 if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
732 heap = rbtdb->heaps[idx];
735 isc_heap_increased(heap, header->heap_index);
737 isc_heap_decreased(heap, header->heap_index);
741 * These functions allow the heap code to rank the priority of each
742 * element. It returns ISC_TRUE if v1 happens "sooner" than v2.
745 ttl_sooner(void *v1, void *v2) {
746 rdatasetheader_t *h1 = v1;
747 rdatasetheader_t *h2 = v2;
749 if (h1->rdh_ttl < h2->rdh_ttl)
755 resign_sooner(void *v1, void *v2) {
756 rdatasetheader_t *h1 = v1;
757 rdatasetheader_t *h2 = v2;
759 if (h1->resign < h2->resign)
765 * This function sets the heap index into the header.
768 set_index(void *what, unsigned int index) {
769 rdatasetheader_t *h = what;
771 h->heap_index = index;
775 * Work out how many nodes can be deleted in the time between two
776 * requests to the nameserver. Smooth the resulting number and use it
777 * as a estimate for the number of nodes to be deleted in the next
781 adjust_quantum(unsigned int old, isc_time_t *start) {
782 unsigned int pps = dns_pps; /* packets per second */
783 unsigned int interval;
792 interval = 1000000 / pps; /* interval in usec */
795 usecs = isc_time_microdiff(&end, start);
798 * We were unable to measure the amount of time taken.
799 * Double the nodes deleted next time.
806 new = old * interval;
807 new /= (unsigned int)usecs;
814 new = (new + old * 3) / 4;
816 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
817 ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
823 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
825 isc_ondestroy_t ondest;
827 char buf[DNS_NAME_FORMATSIZE];
831 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
832 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
834 REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
835 REQUIRE(rbtdb->future_version == NULL);
837 if (rbtdb->current_version != NULL) {
840 isc_refcount_decrement(&rbtdb->current_version->references,
843 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
844 isc_refcount_destroy(&rbtdb->current_version->references);
845 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
846 sizeof(rbtdb_version_t));
850 * We assume the number of remaining dead nodes is reasonably small;
851 * the overhead of unlinking all nodes here should be negligible.
853 for (i = 0; i < rbtdb->node_lock_count; i++) {
856 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
857 while (node != NULL) {
858 ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
859 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
864 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
868 * pick the next tree to (start to) destroy
870 treep = &rbtdb->tree;
871 if (*treep == NULL) {
872 treep = &rbtdb->nsec;
873 if (*treep == NULL) {
874 treep = &rbtdb->nsec3;
876 * we're finished after clear cutting
883 isc_time_now(&start);
884 result = dns_rbt_destroy2(treep, rbtdb->quantum);
885 if (result == ISC_R_QUOTA) {
886 INSIST(rbtdb->task != NULL);
887 if (rbtdb->quantum != 0)
888 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
891 event = isc_event_allocate(rbtdb->common.mctx,
893 DNS_EVENT_FREESTORAGE,
896 sizeof(isc_event_t));
899 isc_task_send(rbtdb->task, &event);
902 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
906 isc_event_free(&event);
908 if (dns_name_dynamic(&rbtdb->common.origin))
909 dns_name_format(&rbtdb->common.origin, buf,
912 strcpy(buf, "<UNKNOWN>");
913 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
914 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
915 "done free_rbtdb(%s)", buf);
917 if (dns_name_dynamic(&rbtdb->common.origin))
918 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
919 for (i = 0; i < rbtdb->node_lock_count; i++) {
920 isc_refcount_destroy(&rbtdb->node_locks[i].references);
921 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
925 * Clean up LRU / re-signing order lists.
927 if (rbtdb->rdatasets != NULL) {
928 for (i = 0; i < rbtdb->node_lock_count; i++)
929 INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
930 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
931 rbtdb->node_lock_count *
932 sizeof(rdatasetheaderlist_t));
935 * Clean up dead node buckets.
937 if (rbtdb->deadnodes != NULL) {
938 for (i = 0; i < rbtdb->node_lock_count; i++)
939 INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
940 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
941 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
944 * Clean up heap objects.
946 if (rbtdb->heaps != NULL) {
947 for (i = 0; i < rbtdb->node_lock_count; i++)
948 isc_heap_destroy(&rbtdb->heaps[i]);
949 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
950 rbtdb->node_lock_count *
951 sizeof(isc_heap_t *));
954 if (rbtdb->rrsetstats != NULL)
955 dns_stats_detach(&rbtdb->rrsetstats);
957 isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
958 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
959 isc_rwlock_destroy(&rbtdb->tree_lock);
960 isc_refcount_destroy(&rbtdb->references);
961 if (rbtdb->task != NULL)
962 isc_task_detach(&rbtdb->task);
964 RBTDB_DESTROYLOCK(&rbtdb->lock);
965 rbtdb->common.magic = 0;
966 rbtdb->common.impmagic = 0;
967 ondest = rbtdb->common.ondest;
968 isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
969 isc_ondestroy_notify(&ondest, rbtdb);
973 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
974 isc_boolean_t want_free = ISC_FALSE;
976 unsigned int inactive = 0;
978 /* XXX check for open versions here */
980 if (rbtdb->soanode != NULL)
981 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
982 if (rbtdb->nsnode != NULL)
983 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
986 * Even though there are no external direct references, there still
987 * may be nodes in use.
989 for (i = 0; i < rbtdb->node_lock_count; i++) {
990 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
991 rbtdb->node_locks[i].exiting = ISC_TRUE;
992 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
993 if (isc_refcount_current(&rbtdb->node_locks[i].references)
1000 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1001 rbtdb->active -= inactive;
1002 if (rbtdb->active == 0)
1003 want_free = ISC_TRUE;
1004 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1006 char buf[DNS_NAME_FORMATSIZE];
1007 if (dns_name_dynamic(&rbtdb->common.origin))
1008 dns_name_format(&rbtdb->common.origin, buf,
1011 strcpy(buf, "<UNKNOWN>");
1012 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1013 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1014 "calling free_rbtdb(%s)", buf);
1015 free_rbtdb(rbtdb, ISC_TRUE, NULL);
1021 detach(dns_db_t **dbp) {
1022 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1025 REQUIRE(VALID_RBTDB(rbtdb));
1027 isc_refcount_decrement(&rbtdb->references, &refs);
1030 maybe_free_rbtdb(rbtdb);
1036 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1037 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1038 rbtdb_version_t *version;
1041 REQUIRE(VALID_RBTDB(rbtdb));
1043 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1044 version = rbtdb->current_version;
1045 isc_refcount_increment(&version->references, &refs);
1046 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1048 *versionp = (dns_dbversion_t *)version;
1051 static inline rbtdb_version_t *
1052 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1053 unsigned int references, isc_boolean_t writer)
1055 isc_result_t result;
1056 rbtdb_version_t *version;
1058 version = isc_mem_get(mctx, sizeof(*version));
1059 if (version == NULL)
1061 version->serial = serial;
1062 result = isc_refcount_init(&version->references, references);
1063 if (result != ISC_R_SUCCESS) {
1064 isc_mem_put(mctx, version, sizeof(*version));
1067 version->writer = writer;
1068 version->commit_ok = ISC_FALSE;
1069 ISC_LIST_INIT(version->changed_list);
1070 ISC_LIST_INIT(version->resigned_list);
1071 ISC_LINK_INIT(version, link);
1077 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1078 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1079 rbtdb_version_t *version;
1081 REQUIRE(VALID_RBTDB(rbtdb));
1082 REQUIRE(versionp != NULL && *versionp == NULL);
1083 REQUIRE(rbtdb->future_version == NULL);
1085 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1086 RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
1087 version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1089 if (version != NULL) {
1090 version->commit_ok = ISC_TRUE;
1091 version->secure = rbtdb->current_version->secure;
1092 version->havensec3 = rbtdb->current_version->havensec3;
1093 if (version->havensec3) {
1094 version->flags = rbtdb->current_version->flags;
1095 version->iterations =
1096 rbtdb->current_version->iterations;
1097 version->hash = rbtdb->current_version->hash;
1098 version->salt_length =
1099 rbtdb->current_version->salt_length;
1100 memcpy(version->salt, rbtdb->current_version->salt,
1101 version->salt_length);
1104 version->iterations = 0;
1106 version->salt_length = 0;
1107 memset(version->salt, 0, sizeof(version->salt));
1109 rbtdb->next_serial++;
1110 rbtdb->future_version = version;
1112 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1114 if (version == NULL)
1115 return (ISC_R_NOMEMORY);
1117 *versionp = version;
1119 return (ISC_R_SUCCESS);
1123 attachversion(dns_db_t *db, dns_dbversion_t *source,
1124 dns_dbversion_t **targetp)
1126 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1127 rbtdb_version_t *rbtversion = source;
1130 REQUIRE(VALID_RBTDB(rbtdb));
1132 isc_refcount_increment(&rbtversion->references, &refs);
1135 *targetp = rbtversion;
1138 static rbtdb_changed_t *
1139 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1140 dns_rbtnode_t *node)
1142 rbtdb_changed_t *changed;
1146 * Caller must be holding the node lock if its reference must be
1147 * protected by the lock.
1150 changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1152 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1154 REQUIRE(version->writer);
1156 if (changed != NULL) {
1157 dns_rbtnode_refincrement(node, &refs);
1159 changed->node = node;
1160 changed->dirty = ISC_FALSE;
1161 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1163 version->commit_ok = ISC_FALSE;
1165 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1171 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1176 unsigned char *raw; /* RDATASLAB */
1179 * The caller must be holding the corresponding node lock.
1185 raw = (unsigned char *)header + sizeof(*header);
1186 count = raw[0] * 256 + raw[1];
1189 * Sanity check: since an additional cache entry has a reference to
1190 * the original DB node (in the callback arg), there should be no
1191 * acache entries when the node can be freed.
1193 for (i = 0; i < count; i++)
1194 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1196 isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1200 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1202 if (dns_name_dynamic(&(*noqname)->name))
1203 dns_name_free(&(*noqname)->name, mctx);
1204 if ((*noqname)->neg != NULL)
1205 isc_mem_put(mctx, (*noqname)->neg,
1206 dns_rdataslab_size((*noqname)->neg, 0));
1207 if ((*noqname)->negsig != NULL)
1208 isc_mem_put(mctx, (*noqname)->negsig,
1209 dns_rdataslab_size((*noqname)->negsig, 0));
1210 isc_mem_put(mctx, *noqname, sizeof(**noqname));
1215 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1217 ISC_LINK_INIT(h, link);
1221 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1222 fprintf(stderr, "initialized header: %p\n", h);
1228 static inline rdatasetheader_t *
1229 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1231 rdatasetheader_t *h;
1233 h = isc_mem_get(mctx, sizeof(*h));
1238 if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1239 fprintf(stderr, "allocated header: %p\n", h);
1241 init_rdataset(rbtdb, h);
1246 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1251 if (EXISTS(rdataset) &&
1252 (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1253 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1256 idx = rdataset->node->locknum;
1257 if (ISC_LINK_LINKED(rdataset, link)) {
1258 INSIST(IS_CACHE(rbtdb));
1259 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1261 if (rdataset->heap_index != 0)
1262 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1263 rdataset->heap_index = 0;
1265 if (rdataset->noqname != NULL)
1266 free_noqname(mctx, &rdataset->noqname);
1267 if (rdataset->closest != NULL)
1268 free_noqname(mctx, &rdataset->closest);
1270 free_acachearray(mctx, rdataset, rdataset->additional_auth);
1271 free_acachearray(mctx, rdataset, rdataset->additional_glue);
1273 if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1274 size = sizeof(*rdataset);
1276 size = dns_rdataslab_size((unsigned char *)rdataset,
1278 isc_mem_put(mctx, rdataset, size);
1282 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1283 rdatasetheader_t *header, *dcurrent;
1284 isc_boolean_t make_dirty = ISC_FALSE;
1287 * Caller must hold the node lock.
1291 * We set the IGNORE attribute on rdatasets with serial number
1292 * 'serial'. When the reference count goes to zero, these rdatasets
1293 * will be cleaned up; until that time, they will be ignored.
1295 for (header = node->data; header != NULL; header = header->next) {
1296 if (header->serial == serial) {
1297 header->attributes |= RDATASET_ATTR_IGNORE;
1298 make_dirty = ISC_TRUE;
1300 for (dcurrent = header->down;
1302 dcurrent = dcurrent->down) {
1303 if (dcurrent->serial == serial) {
1304 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1305 make_dirty = ISC_TRUE;
1314 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1316 rdatasetheader_t *d, *down_next;
1318 for (d = top->down; d != NULL; d = down_next) {
1319 down_next = d->down;
1320 free_rdataset(rbtdb, mctx, d);
1326 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1327 rdatasetheader_t *current, *top_prev, *top_next;
1328 isc_mem_t *mctx = rbtdb->common.mctx;
1331 * Caller must be holding the node lock.
1335 for (current = node->data; current != NULL; current = top_next) {
1336 top_next = current->next;
1337 clean_stale_headers(rbtdb, mctx, current);
1339 * If current is nonexistent or stale, we can clean it up.
1341 if ((current->attributes &
1342 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1343 if (top_prev != NULL)
1344 top_prev->next = current->next;
1346 node->data = current->next;
1347 free_rdataset(rbtdb, mctx, current);
1355 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1356 rbtdb_serial_t least_serial)
1358 rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1359 rdatasetheader_t *top_prev, *top_next;
1360 isc_mem_t *mctx = rbtdb->common.mctx;
1361 isc_boolean_t still_dirty = ISC_FALSE;
1364 * Caller must be holding the node lock.
1366 REQUIRE(least_serial != 0);
1369 for (current = node->data; current != NULL; current = top_next) {
1370 top_next = current->next;
1373 * First, we clean up any instances of multiple rdatasets
1374 * with the same serial number, or that have the IGNORE
1378 for (dcurrent = current->down;
1380 dcurrent = down_next) {
1381 down_next = dcurrent->down;
1382 INSIST(dcurrent->serial <= dparent->serial);
1383 if (dcurrent->serial == dparent->serial ||
1385 if (down_next != NULL)
1386 down_next->next = dparent;
1387 dparent->down = down_next;
1388 free_rdataset(rbtdb, mctx, dcurrent);
1394 * We've now eliminated all IGNORE datasets with the possible
1395 * exception of current, which we now check.
1397 if (IGNORE(current)) {
1398 down_next = current->down;
1399 if (down_next == NULL) {
1400 if (top_prev != NULL)
1401 top_prev->next = current->next;
1403 node->data = current->next;
1404 free_rdataset(rbtdb, mctx, current);
1406 * current no longer exists, so we can
1407 * just continue with the loop.
1412 * Pull up current->down, making it the new
1415 if (top_prev != NULL)
1416 top_prev->next = down_next;
1418 node->data = down_next;
1419 down_next->next = top_next;
1420 free_rdataset(rbtdb, mctx, current);
1421 current = down_next;
1426 * We now try to find the first down node less than the
1430 for (dcurrent = current->down;
1432 dcurrent = down_next) {
1433 down_next = dcurrent->down;
1434 if (dcurrent->serial < least_serial)
1440 * If there is a such an rdataset, delete it and any older
1443 if (dcurrent != NULL) {
1445 down_next = dcurrent->down;
1446 INSIST(dcurrent->serial <= least_serial);
1447 free_rdataset(rbtdb, mctx, dcurrent);
1448 dcurrent = down_next;
1449 } while (dcurrent != NULL);
1450 dparent->down = NULL;
1454 * Note. The serial number of 'current' might be less than
1455 * least_serial too, but we cannot delete it because it is
1456 * the most recent version, unless it is a NONEXISTENT
1459 if (current->down != NULL) {
1460 still_dirty = ISC_TRUE;
1464 * If this is a NONEXISTENT rdataset, we can delete it.
1466 if (NONEXISTENT(current)) {
1467 if (top_prev != NULL)
1468 top_prev->next = current->next;
1470 node->data = current->next;
1471 free_rdataset(rbtdb, mctx, current);
1481 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1483 dns_rbtnode_t *nsecnode;
1484 dns_fixedname_t fname;
1486 isc_result_t result = ISC_R_UNEXPECTED;
1488 INSIST(!ISC_LINK_LINKED(node, deadlink));
1490 switch (node->nsec) {
1491 case DNS_RBT_NSEC_NORMAL:
1492 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1494 case DNS_RBT_NSEC_HAS_NSEC:
1495 dns_fixedname_init(&fname);
1496 name = dns_fixedname_name(&fname);
1497 dns_rbt_fullnamefromnode(node, name);
1499 * Delete the corresponding node from the auxiliary NSEC
1500 * tree before deleting from the main tree.
1503 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1504 NULL, DNS_RBTFIND_EMPTYDATA,
1506 if (result != ISC_R_SUCCESS) {
1507 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1508 DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1510 "dns_rbt_findnode(nsec): %s",
1511 isc_result_totext(result));
1513 result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1515 if (result != ISC_R_SUCCESS) {
1516 isc_log_write(dns_lctx,
1517 DNS_LOGCATEGORY_DATABASE,
1518 DNS_LOGMODULE_CACHE,
1520 "delete_nsecnode(): "
1521 "dns_rbt_deletenode(nsecnode): %s",
1522 isc_result_totext(result));
1525 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1527 case DNS_RBT_NSEC_NSEC:
1528 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1530 case DNS_RBT_NSEC_NSEC3:
1531 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1534 if (result != ISC_R_SUCCESS) {
1535 isc_log_write(dns_lctx,
1536 DNS_LOGCATEGORY_DATABASE,
1537 DNS_LOGMODULE_CACHE,
1539 "delete_nsecnode(): "
1540 "dns_rbt_deletenode: %s",
1541 isc_result_totext(result));
1546 * Clean up dead nodes. These are nodes which have no references, and
1547 * have no data. They are dead but we could not or chose not to delete
1548 * them when we deleted all the data at that node because we did not want
1549 * to wait for the tree write lock.
1551 * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1554 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1555 dns_rbtnode_t *node;
1556 int count = 10; /* XXXJT: should be adjustable */
1558 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1559 while (node != NULL && count > 0) {
1560 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1563 * Since we're holding a tree write lock, it should be
1564 * impossible for this node to be referenced by others.
1566 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1567 node->data == NULL);
1569 delete_node(rbtdb, node);
1571 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1577 * Caller must be holding the node lock if its reference must be protected
1581 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1582 unsigned int lockrefs, noderefs;
1583 isc_refcount_t *lockref;
1585 dns_rbtnode_refincrement0(node, &noderefs);
1586 if (noderefs == 1) { /* this is the first reference to the node */
1587 lockref = &rbtdb->node_locks[node->locknum].references;
1588 isc_refcount_increment0(lockref, &lockrefs);
1589 INSIST(lockrefs != 0);
1591 INSIST(noderefs != 0);
1595 * This function is assumed to be called when a node is newly referenced
1596 * and can be in the deadnode list. In that case the node must be retrieved
1597 * from the list because it is going to be used. In addition, if the caller
1598 * happens to hold a write lock on the tree, it's a good chance to purge dead
1600 * Note: while a new reference is gained in multiple places, there are only very
1601 * few cases where the node can be in the deadnode list (only empty nodes can
1602 * have been added to the list).
1605 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1606 isc_rwlocktype_t treelocktype)
1608 isc_boolean_t need_relock = ISC_FALSE;
1610 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1611 new_reference(rbtdb, node);
1613 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1614 isc_rwlocktype_read);
1615 if (ISC_LINK_LINKED(node, deadlink))
1616 need_relock = ISC_TRUE;
1617 else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1618 treelocktype == isc_rwlocktype_write)
1619 need_relock = ISC_TRUE;
1620 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1621 isc_rwlocktype_read);
1623 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1624 isc_rwlocktype_write);
1625 if (ISC_LINK_LINKED(node, deadlink))
1626 ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1628 if (treelocktype == isc_rwlocktype_write)
1629 cleanup_dead_nodes(rbtdb, node->locknum);
1630 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1631 isc_rwlocktype_write);
1634 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1638 * Caller must be holding the node lock; either the "strong", read or write
1639 * lock. Note that the lock must be held even when node references are
1640 * atomically modified; in that case the decrement operation itself does not
1641 * have to be protected, but we must avoid a race condition where multiple
1642 * threads are decreasing the reference to zero simultaneously and at least
1643 * one of them is going to free the node.
1644 * This function returns ISC_TRUE if and only if the node reference decreases
1647 static isc_boolean_t
1648 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1649 rbtdb_serial_t least_serial,
1650 isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1651 isc_boolean_t pruning)
1653 isc_result_t result;
1654 isc_boolean_t write_locked;
1655 rbtdb_nodelock_t *nodelock;
1656 unsigned int refs, nrefs;
1657 int bucket = node->locknum;
1658 isc_boolean_t no_reference;
1660 nodelock = &rbtdb->node_locks[bucket];
1662 /* Handle easy and typical case first. */
1663 if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1664 dns_rbtnode_refdecrement(node, &nrefs);
1665 INSIST((int)nrefs >= 0);
1667 isc_refcount_decrement(&nodelock->references, &refs);
1668 INSIST((int)refs >= 0);
1670 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1673 /* Upgrade the lock? */
1674 if (nlock == isc_rwlocktype_read) {
1675 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1676 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1678 dns_rbtnode_refdecrement(node, &nrefs);
1679 INSIST((int)nrefs >= 0);
1681 /* Restore the lock? */
1682 if (nlock == isc_rwlocktype_read)
1683 NODE_WEAKDOWNGRADE(&nodelock->lock);
1687 if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1688 if (IS_CACHE(rbtdb))
1689 clean_cache_node(rbtdb, node);
1691 if (least_serial == 0) {
1693 * Caller doesn't know the least serial.
1696 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1697 least_serial = rbtdb->least_serial;
1698 RBTDB_UNLOCK(&rbtdb->lock,
1699 isc_rwlocktype_read);
1701 clean_zone_node(rbtdb, node, least_serial);
1705 isc_refcount_decrement(&nodelock->references, &refs);
1706 INSIST((int)refs >= 0);
1709 * XXXDCL should this only be done for cache zones?
1711 if (node->data != NULL || node->down != NULL) {
1712 /* Restore the lock? */
1713 if (nlock == isc_rwlocktype_read)
1714 NODE_WEAKDOWNGRADE(&nodelock->lock);
1719 * Attempt to switch to a write lock on the tree. If this fails,
1720 * we will add this node to a linked list of nodes in this locking
1721 * bucket which we will free later.
1723 if (tlock != isc_rwlocktype_write) {
1725 * Locking hierarchy notwithstanding, we don't need to free
1726 * the node lock before acquiring the tree write lock because
1727 * we only do a trylock.
1729 if (tlock == isc_rwlocktype_read)
1730 result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1732 result = isc_rwlock_trylock(&rbtdb->tree_lock,
1733 isc_rwlocktype_write);
1734 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1735 result == ISC_R_LOCKBUSY);
1737 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1739 write_locked = ISC_TRUE;
1741 no_reference = ISC_TRUE;
1742 if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1744 * We can now delete the node if the reference counter is
1745 * zero. This should be typically the case, but a different
1746 * thread may still gain a (new) reference just before the
1747 * current thread locks the tree (e.g., in findnode()).
1751 * If this node is the only one in the level it's in, deleting
1752 * this node may recursively make its parent the only node in
1753 * the parent level; if so, and if no one is currently using
1754 * the parent node, this is almost the only opportunity to
1755 * clean it up. But the recursive cleanup is not that trivial
1756 * since the child and parent may be in different lock buckets,
1757 * which would cause a lock order reversal problem. To avoid
1758 * the trouble, we'll dispatch a separate event for batch
1759 * cleaning. We need to check whether we're deleting the node
1760 * as a result of pruning to avoid infinite dispatching.
1761 * Note: pruning happens only when a task has been set for the
1762 * rbtdb. If the user of the rbtdb chooses not to set a task,
1763 * it's their responsibility to purge stale leaves (e.g. by
1764 * periodic walk-through).
1766 if (!pruning && node->parent != NULL &&
1767 node->parent->down == node && node->left == NULL &&
1768 node->right == NULL && rbtdb->task != NULL) {
1772 ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1775 sizeof(isc_event_t));
1777 new_reference(rbtdb, node);
1779 attach((dns_db_t *)rbtdb, &db);
1781 isc_task_send(rbtdb->task, &ev);
1782 no_reference = ISC_FALSE;
1785 * XXX: this is a weird situation. We could
1786 * ignore this error case, but then the stale
1787 * node will unlikely be purged except via a
1788 * rare condition such as manual cleanup. So
1789 * we queue it in the deadnodes list, hoping
1790 * the memory shortage is temporary and the node
1791 * will be deleted later.
1793 isc_log_write(dns_lctx,
1794 DNS_LOGCATEGORY_DATABASE,
1795 DNS_LOGMODULE_CACHE,
1797 "decrement_reference: failed to "
1798 "allocate pruning event");
1799 INSIST(!ISC_LINK_LINKED(node, deadlink));
1800 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1804 if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1805 char printname[DNS_NAME_FORMATSIZE];
1807 isc_log_write(dns_lctx,
1808 DNS_LOGCATEGORY_DATABASE,
1809 DNS_LOGMODULE_CACHE,
1811 "decrement_reference: "
1812 "delete from rbt: %p %s",
1814 dns_rbt_formatnodename(node,
1816 sizeof(printname)));
1819 delete_node(rbtdb, node);
1821 } else if (dns_rbtnode_refcurrent(node) == 0) {
1822 INSIST(!ISC_LINK_LINKED(node, deadlink));
1823 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1825 no_reference = ISC_FALSE;
1827 /* Restore the lock? */
1828 if (nlock == isc_rwlocktype_read)
1829 NODE_WEAKDOWNGRADE(&nodelock->lock);
1832 * Relock a read lock, or unlock the write lock if no lock was held.
1834 if (tlock == isc_rwlocktype_none)
1836 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1838 if (tlock == isc_rwlocktype_read)
1840 isc_rwlock_downgrade(&rbtdb->tree_lock);
1842 return (no_reference);
1846 * Prune the tree by recursively cleaning-up single leaves. In the worst
1847 * case, the number of iteration is the number of tree levels, which is at
1848 * most the maximum number of domain name labels, i.e, 127. In practice, this
1849 * should be much smaller (only a few times), and even the worst case would be
1850 * acceptable for a single event.
1853 prune_tree(isc_task_t *task, isc_event_t *event) {
1854 dns_rbtdb_t *rbtdb = event->ev_sender;
1855 dns_rbtnode_t *node = event->ev_arg;
1856 dns_rbtnode_t *parent;
1857 unsigned int locknum;
1861 isc_event_free(&event);
1863 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1864 locknum = node->locknum;
1865 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1867 parent = node->parent;
1868 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1869 isc_rwlocktype_write, ISC_TRUE);
1871 if (parent != NULL && parent->down == NULL) {
1873 * node was the only down child of the parent and has
1874 * just been removed. We'll then need to examine the
1875 * parent. Keep the lock if possible; otherwise,
1876 * release the old lock and acquire one for the parent.
1878 if (parent->locknum != locknum) {
1879 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1880 isc_rwlocktype_write);
1881 locknum = parent->locknum;
1882 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1883 isc_rwlocktype_write);
1887 * We need to gain a reference to the node before
1888 * decrementing it in the next iteration. In addition,
1889 * if the node is in the dead-nodes list, extract it
1890 * from the list beforehand as we do in
1891 * reactivate_node().
1893 new_reference(rbtdb, parent);
1894 if (ISC_LINK_LINKED(parent, deadlink)) {
1895 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1902 } while (node != NULL);
1903 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1904 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1906 detach((dns_db_t **)&rbtdb);
1910 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1911 rbtdb_changedlist_t *cleanup_list)
1914 * Caller must be holding the database lock.
1917 rbtdb->least_serial = version->serial;
1918 *cleanup_list = version->changed_list;
1919 ISC_LIST_INIT(version->changed_list);
1923 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1924 rbtdb_changed_t *changed, *next_changed;
1927 * If the changed record is dirty, then
1928 * an update created multiple versions of
1929 * a given rdataset. We keep this list
1930 * until we're the least open version, at
1931 * which point it's safe to get rid of any
1934 * If the changed record isn't dirty, then
1935 * we don't need it anymore since we're
1936 * committing and not rolling back.
1938 * The caller must be holding the database lock.
1940 for (changed = HEAD(version->changed_list);
1942 changed = next_changed) {
1943 next_changed = NEXT(changed, link);
1944 if (!changed->dirty) {
1945 UNLINK(version->changed_list,
1947 APPEND(*cleanup_list,
1954 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1962 dns_rdataset_t keyset;
1963 dns_rdataset_t nsecset, signsecset;
1964 isc_boolean_t haszonekey = ISC_FALSE;
1965 isc_boolean_t hasnsec = ISC_FALSE;
1966 isc_result_t result;
1968 dns_rdataset_init(&keyset);
1969 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1970 0, 0, &keyset, NULL);
1971 if (result == ISC_R_SUCCESS) {
1972 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1973 result = dns_rdataset_first(&keyset);
1974 while (result == ISC_R_SUCCESS) {
1975 dns_rdataset_current(&keyset, &keyrdata);
1976 if (dns_zonekey_iszonekey(&keyrdata)) {
1977 haszonekey = ISC_TRUE;
1980 result = dns_rdataset_next(&keyset);
1982 dns_rdataset_disassociate(&keyset);
1985 version->secure = dns_db_insecure;
1986 version->havensec3 = ISC_FALSE;
1990 dns_rdataset_init(&nsecset);
1991 dns_rdataset_init(&signsecset);
1992 result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1993 0, 0, &nsecset, &signsecset);
1994 if (result == ISC_R_SUCCESS) {
1995 if (dns_rdataset_isassociated(&signsecset)) {
1997 dns_rdataset_disassociate(&signsecset);
1999 dns_rdataset_disassociate(&nsecset);
2002 setnsec3parameters(db, version);
2005 * Do we have a valid NSEC/NSEC3 chain?
2007 if (version->havensec3 || hasnsec)
2008 version->secure = dns_db_secure;
2010 version->secure = dns_db_insecure;
2015 * Walk the origin node looking for NSEC3PARAM records.
2016 * Cache the nsec3 parameters.
2020 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2021 dns_rbtnode_t *node;
2022 dns_rdata_nsec3param_t nsec3param;
2023 dns_rdata_t rdata = DNS_RDATA_INIT;
2024 isc_region_t region;
2025 isc_result_t result;
2026 rdatasetheader_t *header, *header_next;
2027 unsigned char *raw; /* RDATASLAB */
2028 unsigned int count, length;
2029 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2031 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2032 version->havensec3 = ISC_FALSE;
2033 node = rbtdb->origin_node;
2034 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2035 isc_rwlocktype_read);
2036 for (header = node->data;
2038 header = header_next) {
2039 header_next = header->next;
2041 if (header->serial <= version->serial &&
2043 if (NONEXISTENT(header))
2047 header = header->down;
2048 } while (header != NULL);
2050 if (header != NULL &&
2051 (header->type == dns_rdatatype_nsec3param)) {
2053 * Find A NSEC3PARAM with a supported algorithm.
2055 raw = (unsigned char *)header + sizeof(*header);
2056 count = raw[0] * 256 + raw[1]; /* count */
2057 #if DNS_RDATASET_FIXED
2058 raw += count * 4 + 2;
2062 while (count-- > 0U) {
2063 length = raw[0] * 256 + raw[1];
2064 #if DNS_RDATASET_FIXED
2070 region.length = length;
2072 dns_rdata_fromregion(&rdata,
2073 rbtdb->common.rdclass,
2074 dns_rdatatype_nsec3param,
2076 result = dns_rdata_tostruct(&rdata,
2079 INSIST(result == ISC_R_SUCCESS);
2080 dns_rdata_reset(&rdata);
2082 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2083 !dns_nsec3_supportedhash(nsec3param.hash))
2086 if (nsec3param.flags != 0)
2089 memcpy(version->salt, nsec3param.salt,
2090 nsec3param.salt_length);
2091 version->hash = nsec3param.hash;
2092 version->salt_length = nsec3param.salt_length;
2093 version->iterations = nsec3param.iterations;
2094 version->flags = nsec3param.flags;
2095 version->havensec3 = ISC_TRUE;
2097 * Look for a better algorithm than the
2098 * unknown test algorithm.
2100 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2106 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2107 isc_rwlocktype_read);
2108 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2113 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2114 dns_rbtdb_t *rbtdb = event->ev_arg;
2115 isc_boolean_t again = ISC_FALSE;
2116 unsigned int locknum;
2119 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2120 for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2121 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2122 isc_rwlocktype_write);
2123 cleanup_dead_nodes(rbtdb, locknum);
2124 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2126 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2127 isc_rwlocktype_write);
2129 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2131 isc_task_send(task, &event);
2133 isc_event_free(&event);
2134 isc_refcount_decrement(&rbtdb->references, &refs);
2136 maybe_free_rbtdb(rbtdb);
2141 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2142 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2143 rbtdb_version_t *version, *cleanup_version, *least_greater;
2144 isc_boolean_t rollback = ISC_FALSE;
2145 rbtdb_changedlist_t cleanup_list;
2146 rdatasetheaderlist_t resigned_list;
2147 rbtdb_changed_t *changed, *next_changed;
2148 rbtdb_serial_t serial, least_serial;
2149 dns_rbtnode_t *rbtnode;
2151 rdatasetheader_t *header;
2152 isc_boolean_t writer;
2154 REQUIRE(VALID_RBTDB(rbtdb));
2155 version = (rbtdb_version_t *)*versionp;
2157 cleanup_version = NULL;
2158 ISC_LIST_INIT(cleanup_list);
2159 ISC_LIST_INIT(resigned_list);
2161 isc_refcount_decrement(&version->references, &refs);
2162 if (refs > 0) { /* typical and easy case first */
2164 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2165 INSIST(!version->writer);
2166 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2171 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2172 serial = version->serial;
2173 writer = version->writer;
2174 if (version->writer) {
2177 rbtdb_version_t *cur_version;
2179 INSIST(version->commit_ok);
2180 INSIST(version == rbtdb->future_version);
2182 * The current version is going to be replaced.
2183 * Release the (likely last) reference to it from the
2184 * DB itself and unlink it from the open list.
2186 cur_version = rbtdb->current_version;
2187 isc_refcount_decrement(&cur_version->references,
2190 if (cur_version->serial == rbtdb->least_serial)
2191 INSIST(EMPTY(cur_version->changed_list));
2192 UNLINK(rbtdb->open_versions,
2195 if (EMPTY(rbtdb->open_versions)) {
2197 * We're going to become the least open
2200 make_least_version(rbtdb, version,
2204 * Some other open version is the
2205 * least version. We can't cleanup
2206 * records that were changed in this
2207 * version because the older versions
2208 * may still be in use by an open
2211 * We can, however, discard the
2212 * changed records for things that
2213 * we've added that didn't exist in
2216 cleanup_nondirty(version, &cleanup_list);
2219 * If the (soon to be former) current version
2220 * isn't being used by anyone, we can clean
2224 cleanup_version = cur_version;
2225 APPENDLIST(version->changed_list,
2226 cleanup_version->changed_list,
2230 * Become the current version.
2232 version->writer = ISC_FALSE;
2233 rbtdb->current_version = version;
2234 rbtdb->current_serial = version->serial;
2235 rbtdb->future_version = NULL;
2238 * Keep the current version in the open list, and
2239 * gain a reference for the DB itself (see the DB
2240 * creation function below). This must be the only
2241 * case where we need to increment the counter from
2242 * zero and need to use isc_refcount_increment0().
2244 isc_refcount_increment0(&version->references,
2246 INSIST(cur_ref == 1);
2247 PREPEND(rbtdb->open_versions,
2248 rbtdb->current_version, link);
2249 resigned_list = version->resigned_list;
2250 ISC_LIST_INIT(version->resigned_list);
2253 * We're rolling back this transaction.
2255 cleanup_list = version->changed_list;
2256 ISC_LIST_INIT(version->changed_list);
2257 resigned_list = version->resigned_list;
2258 ISC_LIST_INIT(version->resigned_list);
2259 rollback = ISC_TRUE;
2260 cleanup_version = version;
2261 rbtdb->future_version = NULL;
2264 if (version != rbtdb->current_version) {
2266 * There are no external or internal references
2267 * to this version and it can be cleaned up.
2269 cleanup_version = version;
2272 * Find the version with the least serial
2273 * number greater than ours.
2275 least_greater = PREV(version, link);
2276 if (least_greater == NULL)
2277 least_greater = rbtdb->current_version;
2279 INSIST(version->serial < least_greater->serial);
2281 * Is this the least open version?
2283 if (version->serial == rbtdb->least_serial) {
2285 * Yes. Install the new least open
2288 make_least_version(rbtdb,
2293 * Add any unexecuted cleanups to
2294 * those of the least greater version.
2296 APPENDLIST(least_greater->changed_list,
2297 version->changed_list,
2300 } else if (version->serial == rbtdb->least_serial)
2301 INSIST(EMPTY(version->changed_list));
2302 UNLINK(rbtdb->open_versions, version, link);
2304 least_serial = rbtdb->least_serial;
2305 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2308 * Update the zone's secure status.
2310 if (writer && commit && !IS_CACHE(rbtdb))
2311 iszonesecure(db, version, rbtdb->origin_node);
2313 if (cleanup_version != NULL) {
2314 INSIST(EMPTY(cleanup_version->changed_list));
2315 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2316 sizeof(*cleanup_version));
2320 * Commit/rollback re-signed headers.
2322 for (header = HEAD(resigned_list);
2324 header = HEAD(resigned_list)) {
2327 ISC_LIST_UNLINK(resigned_list, header, link);
2329 lock = &rbtdb->node_locks[header->node->locknum].lock;
2330 NODE_LOCK(lock, isc_rwlocktype_write);
2332 resign_insert(rbtdb, header->node->locknum, header);
2333 decrement_reference(rbtdb, header->node, least_serial,
2334 isc_rwlocktype_write, isc_rwlocktype_none,
2336 NODE_UNLOCK(lock, isc_rwlocktype_write);
2339 if (!EMPTY(cleanup_list)) {
2340 isc_event_t *event = NULL;
2341 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2343 if (rbtdb->task != NULL)
2344 event = isc_event_allocate(rbtdb->common.mctx, NULL,
2345 DNS_EVENT_RBTDEADNODES,
2346 cleanup_dead_nodes_callback,
2347 rbtdb, sizeof(isc_event_t));
2348 if (event == NULL) {
2350 * We acquire a tree write lock here in order to make
2351 * sure that stale nodes will be removed in
2352 * decrement_reference(). If we didn't have the lock,
2353 * those nodes could miss the chance to be removed
2354 * until the server stops. The write lock is
2355 * expensive, but this event should be rare enough
2356 * to justify the cost.
2358 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2359 tlock = isc_rwlocktype_write;
2362 for (changed = HEAD(cleanup_list);
2364 changed = next_changed) {
2367 next_changed = NEXT(changed, link);
2368 rbtnode = changed->node;
2369 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2371 NODE_LOCK(lock, isc_rwlocktype_write);
2373 * This is a good opportunity to purge any dead nodes,
2377 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2380 rollback_node(rbtnode, serial);
2381 decrement_reference(rbtdb, rbtnode, least_serial,
2382 isc_rwlocktype_write, tlock,
2385 NODE_UNLOCK(lock, isc_rwlocktype_write);
2387 isc_mem_put(rbtdb->common.mctx, changed,
2390 if (event != NULL) {
2391 isc_refcount_increment(&rbtdb->references, NULL);
2392 isc_task_send(rbtdb->task, &event);
2394 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2402 * Add the necessary magic for the wildcard name 'name'
2403 * to be found in 'rbtdb'.
2405 * In order for wildcard matching to work correctly in
2406 * zone_find(), we must ensure that a node for the wildcarding
2407 * level exists in the database, and has its 'find_callback'
2408 * and 'wild' bits set.
2410 * E.g. if the wildcard name is "*.sub.example." then we
2411 * must ensure that "sub.example." exists and is marked as
2415 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2416 isc_result_t result;
2417 dns_name_t foundname;
2418 dns_offsets_t offsets;
2420 dns_rbtnode_t *node = NULL;
2422 dns_name_init(&foundname, offsets);
2423 n = dns_name_countlabels(name);
2426 dns_name_getlabelsequence(name, 1, n, &foundname);
2427 result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2428 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2430 if (result == ISC_R_SUCCESS)
2431 node->nsec = DNS_RBT_NSEC_NORMAL;
2432 node->find_callback = 1;
2434 return (ISC_R_SUCCESS);
2438 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2439 isc_result_t result;
2440 dns_name_t foundname;
2441 dns_offsets_t offsets;
2442 unsigned int n, l, i;
2444 dns_name_init(&foundname, offsets);
2445 n = dns_name_countlabels(name);
2446 l = dns_name_countlabels(&rbtdb->common.origin);
2449 dns_rbtnode_t *node = NULL; /* dummy */
2450 dns_name_getlabelsequence(name, n - i, i, &foundname);
2451 if (dns_name_iswildcard(&foundname)) {
2452 result = add_wildcard_magic(rbtdb, &foundname);
2453 if (result != ISC_R_SUCCESS)
2455 result = dns_rbt_addnode(rbtdb->tree, &foundname,
2457 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2459 if (result == ISC_R_SUCCESS)
2460 node->nsec = DNS_RBT_NSEC_NORMAL;
2464 return (ISC_R_SUCCESS);
2468 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2469 dns_dbnode_t **nodep)
2471 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2472 dns_rbtnode_t *node = NULL;
2473 dns_name_t nodename;
2474 isc_result_t result;
2475 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2477 REQUIRE(VALID_RBTDB(rbtdb));
2479 dns_name_init(&nodename, NULL);
2480 RWLOCK(&rbtdb->tree_lock, locktype);
2481 result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2482 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2483 if (result != ISC_R_SUCCESS) {
2484 RWUNLOCK(&rbtdb->tree_lock, locktype);
2486 if (result == DNS_R_PARTIALMATCH)
2487 result = ISC_R_NOTFOUND;
2491 * It would be nice to try to upgrade the lock instead of
2492 * unlocking then relocking.
2494 locktype = isc_rwlocktype_write;
2495 RWLOCK(&rbtdb->tree_lock, locktype);
2497 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2498 if (result == ISC_R_SUCCESS) {
2499 dns_rbt_namefromnode(node, &nodename);
2500 #ifdef DNS_RBT_USEHASH
2501 node->locknum = node->hashval % rbtdb->node_lock_count;
2503 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2504 rbtdb->node_lock_count;
2506 add_empty_wildcards(rbtdb, name);
2508 if (dns_name_iswildcard(name)) {
2509 result = add_wildcard_magic(rbtdb, name);
2510 if (result != ISC_R_SUCCESS) {
2511 RWUNLOCK(&rbtdb->tree_lock, locktype);
2515 } else if (result != ISC_R_EXISTS) {
2516 RWUNLOCK(&rbtdb->tree_lock, locktype);
2520 reactivate_node(rbtdb, node, locktype);
2521 RWUNLOCK(&rbtdb->tree_lock, locktype);
2523 *nodep = (dns_dbnode_t *)node;
2525 return (ISC_R_SUCCESS);
2529 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2530 dns_dbnode_t **nodep)
2532 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2533 dns_rbtnode_t *node = NULL;
2534 dns_name_t nodename;
2535 isc_result_t result;
2536 isc_rwlocktype_t locktype = isc_rwlocktype_read;
2538 REQUIRE(VALID_RBTDB(rbtdb));
2540 dns_name_init(&nodename, NULL);
2541 RWLOCK(&rbtdb->tree_lock, locktype);
2542 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2543 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2544 if (result != ISC_R_SUCCESS) {
2545 RWUNLOCK(&rbtdb->tree_lock, locktype);
2547 if (result == DNS_R_PARTIALMATCH)
2548 result = ISC_R_NOTFOUND;
2552 * It would be nice to try to upgrade the lock instead of
2553 * unlocking then relocking.
2555 locktype = isc_rwlocktype_write;
2556 RWLOCK(&rbtdb->tree_lock, locktype);
2558 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2559 if (result == ISC_R_SUCCESS) {
2560 dns_rbt_namefromnode(node, &nodename);
2561 #ifdef DNS_RBT_USEHASH
2562 node->locknum = node->hashval % rbtdb->node_lock_count;
2564 node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2565 rbtdb->node_lock_count;
2567 node->nsec = DNS_RBT_NSEC_NSEC3;
2568 } else if (result != ISC_R_EXISTS) {
2569 RWUNLOCK(&rbtdb->tree_lock, locktype);
2573 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2575 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2576 new_reference(rbtdb, node);
2577 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2578 RWUNLOCK(&rbtdb->tree_lock, locktype);
2580 *nodep = (dns_dbnode_t *)node;
2582 return (ISC_R_SUCCESS);
2586 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2587 rbtdb_search_t *search = arg;
2588 rdatasetheader_t *header, *header_next;
2589 rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2590 rdatasetheader_t *found;
2591 isc_result_t result;
2592 dns_rbtnode_t *onode;
2595 * We only want to remember the topmost zone cut, since it's the one
2596 * that counts, so we'll just continue if we've already found a
2599 if (search->zonecut != NULL)
2600 return (DNS_R_CONTINUE);
2603 result = DNS_R_CONTINUE;
2604 onode = search->rbtdb->origin_node;
2606 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2607 isc_rwlocktype_read);
2610 * Look for an NS or DNAME rdataset active in our version.
2613 dname_header = NULL;
2614 sigdname_header = NULL;
2615 for (header = node->data; header != NULL; header = header_next) {
2616 header_next = header->next;
2617 if (header->type == dns_rdatatype_ns ||
2618 header->type == dns_rdatatype_dname ||
2619 header->type == RBTDB_RDATATYPE_SIGDNAME) {
2621 if (header->serial <= search->serial &&
2624 * Is this a "this rdataset doesn't
2627 if (NONEXISTENT(header))
2631 header = header->down;
2632 } while (header != NULL);
2633 if (header != NULL) {
2634 if (header->type == dns_rdatatype_dname)
2635 dname_header = header;
2636 else if (header->type ==
2637 RBTDB_RDATATYPE_SIGDNAME)
2638 sigdname_header = header;
2639 else if (node != onode ||
2640 IS_STUB(search->rbtdb)) {
2642 * We've found an NS rdataset that
2643 * isn't at the origin node. We check
2644 * that they're not at the origin node,
2645 * because otherwise we'd erroneously
2646 * treat the zone top as if it were
2656 * Did we find anything?
2658 if (dname_header != NULL) {
2660 * Note that DNAME has precedence over NS if both exist.
2662 found = dname_header;
2663 search->zonecut_sigrdataset = sigdname_header;
2664 } else if (ns_header != NULL) {
2666 search->zonecut_sigrdataset = NULL;
2669 if (found != NULL) {
2671 * We increment the reference count on node to ensure that
2672 * search->zonecut_rdataset will still be valid later.
2674 new_reference(search->rbtdb, node);
2675 search->zonecut = node;
2676 search->zonecut_rdataset = found;
2677 search->need_cleanup = ISC_TRUE;
2679 * Since we've found a zonecut, anything beneath it is
2680 * glue and is not subject to wildcard matching, so we
2681 * may clear search->wild.
2683 search->wild = ISC_FALSE;
2684 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2686 * If the caller does not want to find glue, then
2687 * this is the best answer and the search should
2690 result = DNS_R_PARTIALMATCH;
2695 * The search will continue beneath the zone cut.
2696 * This may or may not be the best match. In case it
2697 * is, we need to remember the node name.
2699 zcname = dns_fixedname_name(&search->zonecut_name);
2700 RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2702 search->copy_name = ISC_TRUE;
2706 * There is no zonecut at this node which is active in this
2709 * If this is a "wild" node and the caller hasn't disabled
2710 * wildcard matching, remember that we've seen a wild node
2711 * in case we need to go searching for wildcard matches
2714 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2715 search->wild = ISC_TRUE;
2718 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2719 isc_rwlocktype_read);
2725 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2726 rdatasetheader_t *header, isc_stdtime_t now,
2727 dns_rdataset_t *rdataset)
2729 unsigned char *raw; /* RDATASLAB */
2732 * Caller must be holding the node reader lock.
2733 * XXXJT: technically, we need a writer lock, since we'll increment
2734 * the header count below. However, since the actual counter value
2735 * doesn't matter, we prioritize performance here. (We may want to
2736 * use atomic increment when available).
2739 if (rdataset == NULL)
2742 new_reference(rbtdb, node);
2744 INSIST(rdataset->methods == NULL); /* We must be disassociated. */
2746 rdataset->methods = &rdataset_methods;
2747 rdataset->rdclass = rbtdb->common.rdclass;
2748 rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2749 rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2750 rdataset->ttl = header->rdh_ttl - now;
2751 rdataset->trust = header->trust;
2752 if (NXDOMAIN(header))
2753 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2755 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2756 rdataset->private1 = rbtdb;
2757 rdataset->private2 = node;
2758 raw = (unsigned char *)header + sizeof(*header);
2759 rdataset->private3 = raw;
2760 rdataset->count = header->count++;
2761 if (rdataset->count == ISC_UINT32_MAX)
2762 rdataset->count = 0;
2765 * Reset iterator state.
2767 rdataset->privateuint4 = 0;
2768 rdataset->private5 = NULL;
2771 * Add noqname proof.
2773 rdataset->private6 = header->noqname;
2774 if (rdataset->private6 != NULL)
2775 rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
2776 rdataset->private7 = header->closest;
2777 if (rdataset->private7 != NULL)
2778 rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
2781 * Copy out re-signing information.
2783 if (RESIGN(header)) {
2784 rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
2785 rdataset->resign = header->resign;
2787 rdataset->resign = 0;
2790 static inline isc_result_t
2791 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2792 dns_name_t *foundname, dns_rdataset_t *rdataset,
2793 dns_rdataset_t *sigrdataset)
2795 isc_result_t result;
2797 rbtdb_rdatatype_t type;
2798 dns_rbtnode_t *node;
2801 * The caller MUST NOT be holding any node locks.
2804 node = search->zonecut;
2805 type = search->zonecut_rdataset->type;
2808 * If we have to set foundname, we do it before anything else.
2809 * If we were to set foundname after we had set nodep or bound the
2810 * rdataset, then we'd have to undo that work if dns_name_copy()
2811 * failed. By setting foundname first, there's nothing to undo if
2814 if (foundname != NULL && search->copy_name) {
2815 zcname = dns_fixedname_name(&search->zonecut_name);
2816 result = dns_name_copy(zcname, foundname, NULL);
2817 if (result != ISC_R_SUCCESS)
2820 if (nodep != NULL) {
2822 * Note that we don't have to increment the node's reference
2823 * count here because we're going to use the reference we
2824 * already have in the search block.
2827 search->need_cleanup = ISC_FALSE;
2829 if (rdataset != NULL) {
2830 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2831 isc_rwlocktype_read);
2832 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2833 search->now, rdataset);
2834 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2835 bind_rdataset(search->rbtdb, node,
2836 search->zonecut_sigrdataset,
2837 search->now, sigrdataset);
2838 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2839 isc_rwlocktype_read);
2842 if (type == dns_rdatatype_dname)
2843 return (DNS_R_DNAME);
2844 return (DNS_R_DELEGATION);
2847 static inline isc_boolean_t
2848 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2849 dns_rbtnode_t *node)
2851 unsigned char *raw; /* RDATASLAB */
2852 unsigned int count, size;
2854 isc_boolean_t valid = ISC_FALSE;
2855 dns_offsets_t offsets;
2856 isc_region_t region;
2857 rdatasetheader_t *header;
2860 * No additional locking is required.
2864 * Valid glue types are A, AAAA, A6. NS is also a valid glue type
2865 * if it occurs at a zone cut, but is not valid below it.
2867 if (type == dns_rdatatype_ns) {
2868 if (node != search->zonecut) {
2871 } else if (type != dns_rdatatype_a &&
2872 type != dns_rdatatype_aaaa &&
2873 type != dns_rdatatype_a6) {
2877 header = search->zonecut_rdataset;
2878 raw = (unsigned char *)header + sizeof(*header);
2879 count = raw[0] * 256 + raw[1];
2880 #if DNS_RDATASET_FIXED
2881 raw += 2 + (4 * count);
2888 size = raw[0] * 256 + raw[1];
2889 #if DNS_RDATASET_FIXED
2895 region.length = size;
2898 * XXX Until we have rdata structures, we have no choice but
2899 * to directly access the rdata format.
2901 dns_name_init(&ns_name, offsets);
2902 dns_name_fromregion(&ns_name, ®ion);
2903 if (dns_name_compare(&ns_name, name) == 0) {
2912 static inline isc_boolean_t
2913 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2916 dns_fixedname_t fnext;
2917 dns_fixedname_t forigin;
2922 dns_rbtnode_t *node;
2923 isc_result_t result;
2924 isc_boolean_t answer = ISC_FALSE;
2925 rdatasetheader_t *header;
2927 rbtdb = search->rbtdb;
2929 dns_name_init(&prefix, NULL);
2930 dns_fixedname_init(&fnext);
2931 next = dns_fixedname_name(&fnext);
2932 dns_fixedname_init(&forigin);
2933 origin = dns_fixedname_name(&forigin);
2935 result = dns_rbtnodechain_next(chain, NULL, NULL);
2936 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2938 result = dns_rbtnodechain_current(chain, &prefix,
2940 if (result != ISC_R_SUCCESS)
2942 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2943 isc_rwlocktype_read);
2944 for (header = node->data;
2946 header = header->next) {
2947 if (header->serial <= search->serial &&
2948 !IGNORE(header) && EXISTS(header))
2951 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2952 isc_rwlocktype_read);
2955 result = dns_rbtnodechain_next(chain, NULL, NULL);
2957 if (result == ISC_R_SUCCESS)
2958 result = dns_name_concatenate(&prefix, origin, next, NULL);
2959 if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2964 static inline isc_boolean_t
2965 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2966 dns_fixedname_t fnext;
2967 dns_fixedname_t forigin;
2968 dns_fixedname_t fprev;
2976 dns_rbtnode_t *node;
2977 dns_rbtnodechain_t chain;
2978 isc_boolean_t check_next = ISC_TRUE;
2979 isc_boolean_t check_prev = ISC_TRUE;
2980 isc_boolean_t answer = ISC_FALSE;
2981 isc_result_t result;
2982 rdatasetheader_t *header;
2985 rbtdb = search->rbtdb;
2987 dns_name_init(&name, NULL);
2988 dns_name_init(&tname, NULL);
2989 dns_name_init(&rname, NULL);
2990 dns_fixedname_init(&fnext);
2991 next = dns_fixedname_name(&fnext);
2992 dns_fixedname_init(&fprev);
2993 prev = dns_fixedname_name(&fprev);
2994 dns_fixedname_init(&forigin);
2995 origin = dns_fixedname_name(&forigin);
2998 * Find if qname is at or below a empty node.
2999 * Use our own copy of the chain.
3002 chain = search->chain;
3005 result = dns_rbtnodechain_current(&chain, &name,
3007 if (result != ISC_R_SUCCESS)
3009 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3010 isc_rwlocktype_read);
3011 for (header = node->data;
3013 header = header->next) {
3014 if (header->serial <= search->serial &&
3015 !IGNORE(header) && EXISTS(header))
3018 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3019 isc_rwlocktype_read);
3022 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3023 } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3024 if (result == ISC_R_SUCCESS)
3025 result = dns_name_concatenate(&name, origin, prev, NULL);
3026 if (result != ISC_R_SUCCESS)
3027 check_prev = ISC_FALSE;
3029 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3030 while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3032 result = dns_rbtnodechain_current(&chain, &name,
3034 if (result != ISC_R_SUCCESS)
3036 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3037 isc_rwlocktype_read);
3038 for (header = node->data;
3040 header = header->next) {
3041 if (header->serial <= search->serial &&
3042 !IGNORE(header) && EXISTS(header))
3045 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3046 isc_rwlocktype_read);
3049 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3051 if (result == ISC_R_SUCCESS)
3052 result = dns_name_concatenate(&name, origin, next, NULL);
3053 if (result != ISC_R_SUCCESS)
3054 check_next = ISC_FALSE;
3056 dns_name_clone(qname, &rname);
3059 * Remove the wildcard label to find the terminal name.
3061 n = dns_name_countlabels(wname);
3062 dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3065 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3066 (check_next && dns_name_issubdomain(next, &rname))) {
3071 * Remove the left hand label.
3073 n = dns_name_countlabels(&rname);
3074 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3075 } while (!dns_name_equal(&rname, &tname));
3079 static inline isc_result_t
3080 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3084 dns_rbtnode_t *node, *level_node, *wnode;
3085 rdatasetheader_t *header;
3086 isc_result_t result = ISC_R_NOTFOUND;
3089 dns_fixedname_t fwname;
3091 isc_boolean_t done, wild, active;
3092 dns_rbtnodechain_t wchain;
3095 * Caller must be holding the tree lock and MUST NOT be holding
3100 * Examine each ancestor level. If the level's wild bit
3101 * is set, then construct the corresponding wildcard name and
3102 * search for it. If the wildcard node exists, and is active in
3103 * this version, we're done. If not, then we next check to see
3104 * if the ancestor is active in this version. If so, then there
3105 * can be no possible wildcard match and again we're done. If not,
3106 * continue the search.
3109 rbtdb = search->rbtdb;
3110 i = search->chain.level_matches;
3114 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3115 isc_rwlocktype_read);
3118 * First we try to figure out if this node is active in
3119 * the search's version. We do this now, even though we
3120 * may not need the information, because it simplifies the
3121 * locking and code flow.
3123 for (header = node->data;
3125 header = header->next) {
3126 if (header->serial <= search->serial &&
3127 !IGNORE(header) && EXISTS(header))
3140 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3141 isc_rwlocktype_read);
3145 * Construct the wildcard name for this level.
3147 dns_name_init(&name, NULL);
3148 dns_rbt_namefromnode(node, &name);
3149 dns_fixedname_init(&fwname);
3150 wname = dns_fixedname_name(&fwname);
3151 result = dns_name_concatenate(dns_wildcardname, &name,
3154 while (result == ISC_R_SUCCESS && j != 0) {
3156 level_node = search->chain.levels[j];
3157 dns_name_init(&name, NULL);
3158 dns_rbt_namefromnode(level_node, &name);
3159 result = dns_name_concatenate(wname,
3164 if (result != ISC_R_SUCCESS)
3168 dns_rbtnodechain_init(&wchain, NULL);
3169 result = dns_rbt_findnode(rbtdb->tree, wname,
3170 NULL, &wnode, &wchain,
3171 DNS_RBTFIND_EMPTYDATA,
3173 if (result == ISC_R_SUCCESS) {
3177 * We have found the wildcard node. If it
3178 * is active in the search's version, we're
3181 lock = &rbtdb->node_locks[wnode->locknum].lock;
3182 NODE_LOCK(lock, isc_rwlocktype_read);
3183 for (header = wnode->data;
3185 header = header->next) {
3186 if (header->serial <= search->serial &&
3187 !IGNORE(header) && EXISTS(header))
3190 NODE_UNLOCK(lock, isc_rwlocktype_read);
3191 if (header != NULL ||
3192 activeempty(search, &wchain, wname)) {
3193 if (activeemtpynode(search, qname,
3195 return (ISC_R_NOTFOUND);
3198 * The wildcard node is active!
3200 * Note: result is still ISC_R_SUCCESS
3201 * so we don't have to set it.
3206 } else if (result != ISC_R_NOTFOUND &&
3207 result != DNS_R_PARTIALMATCH) {
3209 * An error has occurred. Bail out.
3217 * The level node is active. Any wildcarding
3218 * present at higher levels has no
3219 * effect and we're done.
3221 result = ISC_R_NOTFOUND;
3227 node = search->chain.levels[i];
3235 static isc_boolean_t
3236 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3238 dns_rdata_t rdata = DNS_RDATA_INIT;
3239 dns_rdata_nsec3_t nsec3;
3240 unsigned char *raw; /* RDATASLAB */
3241 unsigned int rdlen, count;
3242 isc_region_t region;
3243 isc_result_t result;
3245 REQUIRE(header->type == dns_rdatatype_nsec3);
3247 raw = (unsigned char *)header + sizeof(*header);
3248 count = raw[0] * 256 + raw[1]; /* count */
3249 #if DNS_RDATASET_FIXED
3250 raw += count * 4 + 2;
3254 while (count-- > 0) {
3255 rdlen = raw[0] * 256 + raw[1];
3256 #if DNS_RDATASET_FIXED
3262 region.length = rdlen;
3263 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3264 dns_rdatatype_nsec3, ®ion);
3266 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3267 INSIST(result == ISC_R_SUCCESS);
3268 if (nsec3.hash == search->rbtversion->hash &&
3269 nsec3.iterations == search->rbtversion->iterations &&
3270 nsec3.salt_length == search->rbtversion->salt_length &&
3271 memcmp(nsec3.salt, search->rbtversion->salt,
3272 nsec3.salt_length) == 0)
3274 dns_rdata_reset(&rdata);
3279 static inline isc_result_t
3280 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3281 dns_name_t *name, dns_name_t *origin,
3282 dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3283 isc_boolean_t *firstp)
3285 dns_fixedname_t ftarget;
3287 dns_rbtnode_t *nsecnode;
3288 isc_result_t result;
3290 if (type == dns_rdatatype_nsec3) {
3291 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3292 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3294 result = dns_rbtnodechain_current(&search->chain, name, origin,
3296 if (result != ISC_R_SUCCESS)
3298 return (ISC_R_SUCCESS);
3301 dns_fixedname_init(&ftarget);
3302 target = dns_fixedname_name(&ftarget);
3307 * Construct the name of the second node to check.
3308 * It is the first node sought in the NSEC tree.
3310 *firstp = ISC_FALSE;
3311 dns_rbtnodechain_init(nsecchain, NULL);
3312 result = dns_name_concatenate(name, origin,
3314 if (result != ISC_R_SUCCESS)
3317 result = dns_rbt_findnode(search->rbtdb->nsec,
3319 &nsecnode, nsecchain,
3320 DNS_RBTFIND_NOOPTIONS,
3322 if (result == ISC_R_SUCCESS) {
3324 * Since this was the first loop, finding the
3325 * name in the NSEC tree implies that the first
3326 * node checked in the main tree had an
3327 * unacceptable NSEC record.
3328 * Try the previous node in the NSEC tree.
3330 result = dns_rbtnodechain_prev(nsecchain,
3332 if (result == DNS_R_NEWORIGIN)
3333 result = ISC_R_SUCCESS;
3334 } else if (result == ISC_R_NOTFOUND
3335 || result == DNS_R_PARTIALMATCH) {
3336 result = dns_rbtnodechain_current(nsecchain,
3337 name, origin, NULL);
3338 if (result == ISC_R_NOTFOUND)
3339 result = ISC_R_NOMORE;
3343 * This is a second or later trip through the auxiliary
3344 * tree for the name of a third or earlier NSEC node in
3345 * the main tree. Previous trips through the NSEC tree
3346 * must have found nodes in the main tree with NSEC
3347 * records. Perhaps they lacked signature records.
3349 result = dns_rbtnodechain_prev(nsecchain, name, origin);
3350 if (result == DNS_R_NEWORIGIN)
3351 result = ISC_R_SUCCESS;
3352 if (result != ISC_R_SUCCESS)
3355 if (result != ISC_R_SUCCESS)
3359 * Construct the name to seek in the main tree.
3361 result = dns_name_concatenate(name, origin, target, NULL);
3362 if (result != ISC_R_SUCCESS)
3366 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3367 nodep, &search->chain,
3368 DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3369 if (result == ISC_R_SUCCESS)
3373 * There should always be a node in the main tree with the
3374 * same name as the node in the auxiliary NSEC tree, except for
3375 * nodes in the auxiliary tree that are awaiting deletion.
3377 if (result == DNS_R_PARTIALMATCH)
3378 result = ISC_R_NOTFOUND;
3380 if (result != ISC_R_NOTFOUND) {
3381 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3382 DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3383 "previous_closest_nsec(): %s",
3384 isc_result_totext(result));
3385 return (DNS_R_BADDB);
3390 static inline isc_result_t
3391 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3392 dns_name_t *foundname, dns_rdataset_t *rdataset,
3393 dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3394 dns_db_secure_t secure)
3396 dns_rbtnode_t *node, *prevnode;
3397 rdatasetheader_t *header, *header_next, *found, *foundsig;
3398 dns_rbtnodechain_t nsecchain;
3399 isc_boolean_t empty_node;
3400 isc_result_t result;
3401 dns_fixedname_t fname, forigin;
3402 dns_name_t *name, *origin;
3403 dns_rdatatype_t type;
3404 rbtdb_rdatatype_t sigtype;
3405 isc_boolean_t wraps;
3406 isc_boolean_t first = ISC_TRUE;
3407 isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3409 if (tree == search->rbtdb->nsec3) {
3410 type = dns_rdatatype_nsec3;
3411 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3414 type = dns_rdatatype_nsec;
3415 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3420 * Use the auxiliary tree only starting with the second node in the
3421 * hope that the original node will be right much of the time.
3423 dns_fixedname_init(&fname);
3424 name = dns_fixedname_name(&fname);
3425 dns_fixedname_init(&forigin);
3426 origin = dns_fixedname_name(&forigin);
3429 result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3430 if (result != ISC_R_SUCCESS)
3433 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3434 isc_rwlocktype_read);
3437 empty_node = ISC_TRUE;
3438 for (header = node->data;
3440 header = header_next) {
3441 header_next = header->next;
3443 * Look for an active, extant NSEC or RRSIG NSEC.
3446 if (header->serial <= search->serial &&
3449 * Is this a "this rdataset doesn't
3452 if (NONEXISTENT(header))
3456 header = header->down;
3457 } while (header != NULL);
3458 if (header != NULL) {
3460 * We now know that there is at least one
3461 * active rdataset at this node.
3463 empty_node = ISC_FALSE;
3464 if (header->type == type) {
3466 if (foundsig != NULL)
3468 } else if (header->type == sigtype) {
3476 if (found != NULL && search->rbtversion->havensec3 &&
3477 found->type == dns_rdatatype_nsec3 &&
3478 !matchparams(found, search)) {
3479 empty_node = ISC_TRUE;
3482 result = dns_rbtnodechain_prev(&search->chain,
3484 } else if (found != NULL &&
3485 (foundsig != NULL || !need_sig)) {
3487 * We've found the right NSEC/NSEC3 record.
3489 * Note: for this to really be the right
3490 * NSEC record, it's essential that the NSEC
3491 * records of any nodes obscured by a zone
3492 * cut have been removed; we assume this is
3495 result = dns_name_concatenate(name, origin,
3497 if (result == ISC_R_SUCCESS) {
3498 if (nodep != NULL) {
3499 new_reference(search->rbtdb,
3503 bind_rdataset(search->rbtdb, node,
3506 if (foundsig != NULL)
3507 bind_rdataset(search->rbtdb,
3513 } else if (found == NULL && foundsig == NULL) {
3515 * This node is active, but has no NSEC or
3516 * RRSIG NSEC. That means it's glue or
3517 * other obscured zone data that isn't
3518 * relevant for our search. Treat the
3519 * node as if it were empty and keep looking.
3521 empty_node = ISC_TRUE;
3522 result = previous_closest_nsec(type, search,
3523 name, origin, &prevnode,
3524 &nsecchain, &first);
3527 * We found an active node, but either the
3528 * NSEC or the RRSIG NSEC is missing. This
3531 result = DNS_R_BADDB;
3535 * This node isn't active. We've got to keep
3538 result = previous_closest_nsec(type, search,
3539 name, origin, &prevnode,
3540 &nsecchain, &first);
3542 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3543 isc_rwlocktype_read);
3545 } while (empty_node && result == ISC_R_SUCCESS);
3548 dns_rbtnodechain_invalidate(&nsecchain);
3550 if (result == ISC_R_NOMORE && wraps) {
3551 result = dns_rbtnodechain_last(&search->chain, tree,
3553 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3560 * If the result is ISC_R_NOMORE, then we got to the beginning of
3561 * the database and didn't find a NSEC record. This shouldn't
3564 if (result == ISC_R_NOMORE)
3565 result = DNS_R_BADDB;
3571 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3572 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3573 dns_dbnode_t **nodep, dns_name_t *foundname,
3574 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3576 dns_rbtnode_t *node = NULL;
3577 isc_result_t result;
3578 rbtdb_search_t search;
3579 isc_boolean_t cname_ok = ISC_TRUE;
3580 isc_boolean_t close_version = ISC_FALSE;
3581 isc_boolean_t maybe_zonecut = ISC_FALSE;
3582 isc_boolean_t at_zonecut = ISC_FALSE;
3584 isc_boolean_t empty_node;
3585 rdatasetheader_t *header, *header_next, *found, *nsecheader;
3586 rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3587 rbtdb_rdatatype_t sigtype;
3588 isc_boolean_t active;
3589 dns_rbtnodechain_t chain;
3593 search.rbtdb = (dns_rbtdb_t *)db;
3595 REQUIRE(VALID_RBTDB(search.rbtdb));
3598 * We don't care about 'now'.
3603 * If the caller didn't supply a version, attach to the current
3606 if (version == NULL) {
3607 currentversion(db, &version);
3608 close_version = ISC_TRUE;
3611 search.rbtversion = version;
3612 search.serial = search.rbtversion->serial;
3613 search.options = options;
3614 search.copy_name = ISC_FALSE;
3615 search.need_cleanup = ISC_FALSE;
3616 search.wild = ISC_FALSE;
3617 search.zonecut = NULL;
3618 dns_fixedname_init(&search.zonecut_name);
3619 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3623 * 'wild' will be true iff. we've matched a wildcard.
3627 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3630 * Search down from the root of the tree. If, while going down, we
3631 * encounter a callback node, zone_zonecut_callback() will search the
3632 * rdatasets at the zone cut for active DNAME or NS rdatasets.
3634 tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3636 result = dns_rbt_findnode(tree, name, foundname, &node,
3637 &search.chain, DNS_RBTFIND_EMPTYDATA,
3638 zone_zonecut_callback, &search);
3640 if (result == DNS_R_PARTIALMATCH) {
3642 if (search.zonecut != NULL) {
3643 result = setup_delegation(&search, nodep, foundname,
3644 rdataset, sigrdataset);
3650 * At least one of the levels in the search chain
3651 * potentially has a wildcard. For each such level,
3652 * we must see if there's a matching wildcard active
3653 * in the current version.
3655 result = find_wildcard(&search, &node, name);
3656 if (result == ISC_R_SUCCESS) {
3657 result = dns_name_copy(name, foundname, NULL);
3658 if (result != ISC_R_SUCCESS)
3663 else if (result != ISC_R_NOTFOUND)
3667 chain = search.chain;
3668 active = activeempty(&search, &chain, name);
3671 * If we're here, then the name does not exist, is not
3672 * beneath a zonecut, and there's no matching wildcard.
3674 if ((search.rbtversion->secure == dns_db_secure &&
3675 !search.rbtversion->havensec3) ||
3676 (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3677 (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3679 result = find_closest_nsec(&search, nodep, foundname,
3680 rdataset, sigrdataset, tree,
3681 search.rbtversion->secure);
3682 if (result == ISC_R_SUCCESS)
3683 result = active ? DNS_R_EMPTYNAME :
3686 result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3688 } else if (result != ISC_R_SUCCESS)
3693 * We have found a node whose name is the desired name, or we
3694 * have matched a wildcard.
3697 if (search.zonecut != NULL) {
3699 * If we're beneath a zone cut, we don't want to look for
3700 * CNAMEs because they're not legitimate zone glue.
3702 cname_ok = ISC_FALSE;
3705 * The node may be a zone cut itself. If it might be one,
3706 * make sure we check for it later.
3708 * DS records live above the zone cut in ordinary zone so
3709 * we want to ignore any referral.
3711 * Stub zones don't have anything "above" the delgation so
3712 * we always return a referral.
3714 if (node->find_callback &&
3715 ((node != search.rbtdb->origin_node &&
3716 !dns_rdatatype_atparent(type)) ||
3717 IS_STUB(search.rbtdb)))
3718 maybe_zonecut = ISC_TRUE;
3722 * Certain DNSSEC types are not subject to CNAME matching
3723 * (RFC4035, section 2.5 and RFC3007).
3725 * We don't check for RRSIG, because we don't store RRSIG records
3728 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3729 cname_ok = ISC_FALSE;
3732 * We now go looking for rdata...
3735 lock = &search.rbtdb->node_locks[node->locknum].lock;
3736 NODE_LOCK(lock, isc_rwlocktype_read);
3740 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3744 empty_node = ISC_TRUE;
3745 for (header = node->data; header != NULL; header = header_next) {
3746 header_next = header->next;
3748 * Look for an active, extant rdataset.
3751 if (header->serial <= search.serial &&
3754 * Is this a "this rdataset doesn't
3757 if (NONEXISTENT(header))
3761 header = header->down;
3762 } while (header != NULL);
3763 if (header != NULL) {
3765 * We now know that there is at least one active
3766 * rdataset at this node.
3768 empty_node = ISC_FALSE;
3771 * Do special zone cut handling, if requested.
3773 if (maybe_zonecut &&
3774 header->type == dns_rdatatype_ns) {
3776 * We increment the reference count on node to
3777 * ensure that search->zonecut_rdataset will
3778 * still be valid later.
3780 new_reference(search.rbtdb, node);
3781 search.zonecut = node;
3782 search.zonecut_rdataset = header;
3783 search.zonecut_sigrdataset = NULL;
3784 search.need_cleanup = ISC_TRUE;
3785 maybe_zonecut = ISC_FALSE;
3786 at_zonecut = ISC_TRUE;
3788 * It is not clear if KEY should still be
3789 * allowed at the parent side of the zone
3790 * cut or not. It is needed for RFC3007
3791 * validated updates.
3793 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3794 && type != dns_rdatatype_nsec
3795 && type != dns_rdatatype_key) {
3797 * Glue is not OK, but any answer we
3798 * could return would be glue. Return
3804 if (found != NULL && foundsig != NULL)
3810 * If the NSEC3 record doesn't match the chain
3811 * we are using behave as if it isn't here.
3813 if (header->type == dns_rdatatype_nsec3 &&
3814 !matchparams(header, &search)) {
3815 NODE_UNLOCK(lock, isc_rwlocktype_read);
3819 * If we found a type we were looking for,
3822 if (header->type == type ||
3823 type == dns_rdatatype_any ||
3824 (header->type == dns_rdatatype_cname &&
3827 * We've found the answer!
3830 if (header->type == dns_rdatatype_cname &&
3833 * We may be finding a CNAME instead
3834 * of the desired type.
3836 * If we've already got the CNAME RRSIG,
3837 * use it, otherwise change sigtype
3838 * so that we find it.
3840 if (cnamesig != NULL)
3841 foundsig = cnamesig;
3844 RBTDB_RDATATYPE_SIGCNAME;
3847 * If we've got all we need, end the search.
3849 if (!maybe_zonecut && foundsig != NULL)
3851 } else if (header->type == sigtype) {
3853 * We've found the RRSIG rdataset for our
3854 * target type. Remember it.
3858 * If we've got all we need, end the search.
3860 if (!maybe_zonecut && found != NULL)
3862 } else if (header->type == dns_rdatatype_nsec &&
3863 !search.rbtversion->havensec3) {
3865 * Remember a NSEC rdataset even if we're
3866 * not specifically looking for it, because
3867 * we might need it later.
3869 nsecheader = header;
3870 } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3871 !search.rbtversion->havensec3) {
3873 * If we need the NSEC rdataset, we'll also
3874 * need its signature.
3877 } else if (cname_ok &&
3878 header->type == RBTDB_RDATATYPE_SIGCNAME) {
3880 * If we get a CNAME match, we'll also need
3890 * We have an exact match for the name, but there are no
3891 * active rdatasets in the desired version. That means that
3892 * this node doesn't exist in the desired version, and that
3893 * we really have a partial match.
3896 NODE_UNLOCK(lock, isc_rwlocktype_read);
3902 * If we didn't find what we were looking for...
3904 if (found == NULL) {
3905 if (search.zonecut != NULL) {
3907 * We were trying to find glue at a node beneath a
3908 * zone cut, but didn't.
3910 * Return the delegation.
3912 NODE_UNLOCK(lock, isc_rwlocktype_read);
3913 result = setup_delegation(&search, nodep, foundname,
3914 rdataset, sigrdataset);
3918 * The desired type doesn't exist.
3920 result = DNS_R_NXRRSET;
3921 if (search.rbtversion->secure == dns_db_secure &&
3922 !search.rbtversion->havensec3 &&
3923 (nsecheader == NULL || nsecsig == NULL)) {
3925 * The zone is secure but there's no NSEC,
3926 * or the NSEC has no signature!
3929 result = DNS_R_BADDB;
3933 NODE_UNLOCK(lock, isc_rwlocktype_read);
3934 result = find_closest_nsec(&search, nodep, foundname,
3935 rdataset, sigrdataset,
3937 search.rbtversion->secure);
3938 if (result == ISC_R_SUCCESS)
3939 result = DNS_R_EMPTYWILD;
3942 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3946 * There's no NSEC record, and we were told
3949 result = DNS_R_BADDB;
3952 if (nodep != NULL) {
3953 new_reference(search.rbtdb, node);
3956 if ((search.rbtversion->secure == dns_db_secure &&
3957 !search.rbtversion->havensec3) ||
3958 (search.options & DNS_DBFIND_FORCENSEC) != 0)
3960 bind_rdataset(search.rbtdb, node, nsecheader,
3962 if (nsecsig != NULL)
3963 bind_rdataset(search.rbtdb, node,
3964 nsecsig, 0, sigrdataset);
3967 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3972 * We found what we were looking for, or we found a CNAME.
3975 if (type != found->type &&
3976 type != dns_rdatatype_any &&
3977 found->type == dns_rdatatype_cname) {
3979 * We weren't doing an ANY query and we found a CNAME instead
3980 * of the type we were looking for, so we need to indicate
3981 * that result to the caller.
3983 result = DNS_R_CNAME;
3984 } else if (search.zonecut != NULL) {
3986 * If we're beneath a zone cut, we must indicate that the
3987 * result is glue, unless we're actually at the zone cut
3988 * and the type is NSEC or KEY.
3990 if (search.zonecut == node) {
3992 * It is not clear if KEY should still be
3993 * allowed at the parent side of the zone
3994 * cut or not. It is needed for RFC3007
3995 * validated updates.
3997 if (type == dns_rdatatype_nsec ||
3998 type == dns_rdatatype_nsec3 ||
3999 type == dns_rdatatype_key)
4000 result = ISC_R_SUCCESS;
4001 else if (type == dns_rdatatype_any)
4002 result = DNS_R_ZONECUT;
4004 result = DNS_R_GLUE;
4006 result = DNS_R_GLUE;
4008 * We might have found data that isn't glue, but was occluded
4009 * by a dynamic update. If the caller cares about this, they
4010 * will have told us to validate glue.
4012 * XXX We should cache the glue validity state!
4014 if (result == DNS_R_GLUE &&
4015 (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4016 !valid_glue(&search, foundname, type, node)) {
4017 NODE_UNLOCK(lock, isc_rwlocktype_read);
4018 result = setup_delegation(&search, nodep, foundname,
4019 rdataset, sigrdataset);
4024 * An ordinary successful query!
4026 result = ISC_R_SUCCESS;
4029 if (nodep != NULL) {
4031 new_reference(search.rbtdb, node);
4033 search.need_cleanup = ISC_FALSE;
4037 if (type != dns_rdatatype_any) {
4038 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4039 if (foundsig != NULL)
4040 bind_rdataset(search.rbtdb, node, foundsig, 0,
4045 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4048 NODE_UNLOCK(lock, isc_rwlocktype_read);
4051 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4054 * If we found a zonecut but aren't going to use it, we have to
4057 if (search.need_cleanup) {
4058 node = search.zonecut;
4059 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4061 NODE_LOCK(lock, isc_rwlocktype_read);
4062 decrement_reference(search.rbtdb, node, 0,
4063 isc_rwlocktype_read, isc_rwlocktype_none,
4065 NODE_UNLOCK(lock, isc_rwlocktype_read);
4069 closeversion(db, &version, ISC_FALSE);
4071 dns_rbtnodechain_reset(&search.chain);
4077 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4078 isc_stdtime_t now, dns_dbnode_t **nodep,
4079 dns_name_t *foundname,
4080 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4089 UNUSED(sigrdataset);
4091 FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4094 return (ISC_R_NOTIMPLEMENTED);
4098 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4099 rbtdb_search_t *search = arg;
4100 rdatasetheader_t *header, *header_prev, *header_next;
4101 rdatasetheader_t *dname_header, *sigdname_header;
4102 isc_result_t result;
4104 isc_rwlocktype_t locktype;
4108 REQUIRE(search->zonecut == NULL);
4111 * Keep compiler silent.
4115 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4116 locktype = isc_rwlocktype_read;
4117 NODE_LOCK(lock, locktype);
4120 * Look for a DNAME or RRSIG DNAME rdataset.
4122 dname_header = NULL;
4123 sigdname_header = NULL;
4125 for (header = node->data; header != NULL; header = header_next) {
4126 header_next = header->next;
4127 if (header->rdh_ttl <= search->now) {
4129 * This rdataset is stale. If no one else is
4130 * using the node, we can clean it up right
4131 * now, otherwise we mark it as stale, and
4132 * the node as dirty, so it will get cleaned
4135 if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4136 (locktype == isc_rwlocktype_write ||
4137 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4139 * We update the node's status only when we
4140 * can get write access; otherwise, we leave
4141 * others to this work. Periodical cleaning
4142 * will eventually take the job as the last
4144 * We won't downgrade the lock, since other
4145 * rdatasets are probably stale, too.
4147 locktype = isc_rwlocktype_write;
4149 if (dns_rbtnode_refcurrent(node) == 0) {
4153 * header->down can be non-NULL if the
4154 * refcount has just decremented to 0
4155 * but decrement_reference() has not
4156 * performed clean_cache_node(), in
4157 * which case we need to purge the
4158 * stale headers first.
4160 mctx = search->rbtdb->common.mctx;
4161 clean_stale_headers(search->rbtdb,
4164 if (header_prev != NULL)
4168 node->data = header->next;
4169 free_rdataset(search->rbtdb, mctx,
4172 header->attributes |=
4173 RDATASET_ATTR_STALE;
4175 header_prev = header;
4178 header_prev = header;
4179 } else if (header->type == dns_rdatatype_dname &&
4181 dname_header = header;
4182 header_prev = header;
4183 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4185 sigdname_header = header;
4186 header_prev = header;
4188 header_prev = header;
4191 if (dname_header != NULL &&
4192 (!DNS_TRUST_PENDING(dname_header->trust) ||
4193 (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4195 * We increment the reference count on node to ensure that
4196 * search->zonecut_rdataset will still be valid later.
4198 new_reference(search->rbtdb, node);
4199 INSIST(!ISC_LINK_LINKED(node, deadlink));
4200 search->zonecut = node;
4201 search->zonecut_rdataset = dname_header;
4202 search->zonecut_sigrdataset = sigdname_header;
4203 search->need_cleanup = ISC_TRUE;
4204 result = DNS_R_PARTIALMATCH;
4206 result = DNS_R_CONTINUE;
4208 NODE_UNLOCK(lock, locktype);
4213 static inline isc_result_t
4214 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4215 dns_dbnode_t **nodep, dns_name_t *foundname,
4216 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4219 dns_rbtnode_t *level_node;
4220 rdatasetheader_t *header, *header_prev, *header_next;
4221 rdatasetheader_t *found, *foundsig;
4222 isc_result_t result = ISC_R_NOTFOUND;
4227 isc_rwlocktype_t locktype;
4230 * Caller must be holding the tree lock.
4233 rbtdb = search->rbtdb;
4234 i = search->chain.level_matches;
4237 locktype = isc_rwlocktype_read;
4238 lock = &rbtdb->node_locks[node->locknum].lock;
4239 NODE_LOCK(lock, locktype);
4242 * Look for NS and RRSIG NS rdatasets.
4247 for (header = node->data;
4249 header = header_next) {
4250 header_next = header->next;
4251 if (header->rdh_ttl <= search->now) {
4253 * This rdataset is stale. If no one else is
4254 * using the node, we can clean it up right
4255 * now, otherwise we mark it as stale, and
4256 * the node as dirty, so it will get cleaned
4259 if ((header->rdh_ttl <= search->now -
4261 (locktype == isc_rwlocktype_write ||
4262 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4264 * We update the node's status only
4265 * when we can get write access.
4267 locktype = isc_rwlocktype_write;
4269 if (dns_rbtnode_refcurrent(node)
4273 m = search->rbtdb->common.mctx;
4274 clean_stale_headers(
4277 if (header_prev != NULL)
4283 free_rdataset(rbtdb, m,
4286 header->attributes |=
4287 RDATASET_ATTR_STALE;
4289 header_prev = header;
4292 header_prev = header;
4293 } else if (EXISTS(header)) {
4295 * We've found an extant rdataset. See if
4296 * we're interested in it.
4298 if (header->type == dns_rdatatype_ns) {
4300 if (foundsig != NULL)
4302 } else if (header->type ==
4303 RBTDB_RDATATYPE_SIGNS) {
4308 header_prev = header;
4310 header_prev = header;
4313 if (found != NULL) {
4315 * If we have to set foundname, we do it before
4316 * anything else. If we were to set foundname after
4317 * we had set nodep or bound the rdataset, then we'd
4318 * have to undo that work if dns_name_concatenate()
4319 * failed. By setting foundname first, there's
4320 * nothing to undo if we have trouble.
4322 if (foundname != NULL) {
4323 dns_name_init(&name, NULL);
4324 dns_rbt_namefromnode(node, &name);
4325 result = dns_name_copy(&name, foundname, NULL);
4326 while (result == ISC_R_SUCCESS && i > 0) {
4328 level_node = search->chain.levels[i];
4329 dns_name_init(&name, NULL);
4330 dns_rbt_namefromnode(level_node,
4333 dns_name_concatenate(foundname,
4338 if (result != ISC_R_SUCCESS) {
4343 result = DNS_R_DELEGATION;
4344 if (nodep != NULL) {
4345 new_reference(search->rbtdb, node);
4348 bind_rdataset(search->rbtdb, node, found, search->now,
4350 if (foundsig != NULL)
4351 bind_rdataset(search->rbtdb, node, foundsig,
4352 search->now, sigrdataset);
4353 if (need_headerupdate(found, search->now) ||
4354 (foundsig != NULL &&
4355 need_headerupdate(foundsig, search->now))) {
4356 if (locktype != isc_rwlocktype_write) {
4357 NODE_UNLOCK(lock, locktype);
4358 NODE_LOCK(lock, isc_rwlocktype_write);
4359 locktype = isc_rwlocktype_write;
4361 if (need_headerupdate(found, search->now))
4362 update_header(search->rbtdb, found,
4364 if (foundsig != NULL &&
4365 need_headerupdate(foundsig, search->now)) {
4366 update_header(search->rbtdb, foundsig,
4373 NODE_UNLOCK(lock, locktype);
4375 if (found == NULL && i > 0) {
4377 node = search->chain.levels[i];
4387 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4388 isc_stdtime_t now, dns_name_t *foundname,
4389 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4391 dns_rbtnode_t *node;
4392 rdatasetheader_t *header, *header_next, *header_prev;
4393 rdatasetheader_t *found, *foundsig;
4394 isc_boolean_t empty_node;
4395 isc_result_t result;
4396 dns_fixedname_t fname, forigin;
4397 dns_name_t *name, *origin;
4398 rbtdb_rdatatype_t matchtype, sigmatchtype;
4400 isc_rwlocktype_t locktype;
4402 matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4403 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4404 dns_rdatatype_nsec);
4408 dns_fixedname_init(&fname);
4409 name = dns_fixedname_name(&fname);
4410 dns_fixedname_init(&forigin);
4411 origin = dns_fixedname_name(&forigin);
4412 result = dns_rbtnodechain_current(&search->chain, name,
4414 if (result != ISC_R_SUCCESS)
4416 locktype = isc_rwlocktype_read;
4417 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4418 NODE_LOCK(lock, locktype);
4421 empty_node = ISC_TRUE;
4423 for (header = node->data;
4425 header = header_next) {
4426 header_next = header->next;
4427 if (header->rdh_ttl <= now) {
4429 * This rdataset is stale. If no one else is
4430 * using the node, we can clean it up right
4431 * now, otherwise we mark it as stale, and the
4432 * node as dirty, so it will get cleaned up
4435 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4436 (locktype == isc_rwlocktype_write ||
4437 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4439 * We update the node's status only
4440 * when we can get write access.
4442 locktype = isc_rwlocktype_write;
4444 if (dns_rbtnode_refcurrent(node)
4448 m = search->rbtdb->common.mctx;
4449 clean_stale_headers(
4452 if (header_prev != NULL)
4456 node->data = header->next;
4457 free_rdataset(search->rbtdb, m,
4460 header->attributes |=
4461 RDATASET_ATTR_STALE;
4463 header_prev = header;
4466 header_prev = header;
4469 if (NONEXISTENT(header) ||
4470 RBTDB_RDATATYPE_BASE(header->type) == 0) {
4471 header_prev = header;
4474 empty_node = ISC_FALSE;
4475 if (header->type == matchtype)
4477 else if (header->type == sigmatchtype)
4479 header_prev = header;
4481 if (found != NULL) {
4482 result = dns_name_concatenate(name, origin,
4484 if (result != ISC_R_SUCCESS)
4486 bind_rdataset(search->rbtdb, node, found,
4488 if (foundsig != NULL)
4489 bind_rdataset(search->rbtdb, node, foundsig,
4491 new_reference(search->rbtdb, node);
4493 result = DNS_R_COVERINGNSEC;
4494 } else if (!empty_node) {
4495 result = ISC_R_NOTFOUND;
4497 result = dns_rbtnodechain_prev(&search->chain, NULL,
4500 NODE_UNLOCK(lock, locktype);
4501 } while (empty_node && result == ISC_R_SUCCESS);
4506 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4507 dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4508 dns_dbnode_t **nodep, dns_name_t *foundname,
4509 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4511 dns_rbtnode_t *node = NULL;
4512 isc_result_t result;
4513 rbtdb_search_t search;
4514 isc_boolean_t cname_ok = ISC_TRUE;
4515 isc_boolean_t empty_node;
4517 isc_rwlocktype_t locktype;
4518 rdatasetheader_t *header, *header_prev, *header_next;
4519 rdatasetheader_t *found, *nsheader;
4520 rdatasetheader_t *foundsig, *nssig, *cnamesig;
4521 rdatasetheader_t *update, *updatesig;
4522 rbtdb_rdatatype_t sigtype, negtype;
4526 search.rbtdb = (dns_rbtdb_t *)db;
4528 REQUIRE(VALID_RBTDB(search.rbtdb));
4529 REQUIRE(version == NULL);
4532 isc_stdtime_get(&now);
4534 search.rbtversion = NULL;
4536 search.options = options;
4537 search.copy_name = ISC_FALSE;
4538 search.need_cleanup = ISC_FALSE;
4539 search.wild = ISC_FALSE;
4540 search.zonecut = NULL;
4541 dns_fixedname_init(&search.zonecut_name);
4542 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4547 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4550 * Search down from the root of the tree. If, while going down, we
4551 * encounter a callback node, cache_zonecut_callback() will search the
4552 * rdatasets at the zone cut for a DNAME rdataset.
4554 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4555 &search.chain, DNS_RBTFIND_EMPTYDATA,
4556 cache_zonecut_callback, &search);
4558 if (result == DNS_R_PARTIALMATCH) {
4559 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4560 result = find_coveringnsec(&search, nodep, now,
4561 foundname, rdataset,
4563 if (result == DNS_R_COVERINGNSEC)
4566 if (search.zonecut != NULL) {
4567 result = setup_delegation(&search, nodep, foundname,
4568 rdataset, sigrdataset);
4572 result = find_deepest_zonecut(&search, node, nodep,
4573 foundname, rdataset,
4577 } else if (result != ISC_R_SUCCESS)
4581 * Certain DNSSEC types are not subject to CNAME matching
4582 * (RFC4035, section 2.5 and RFC3007).
4584 * We don't check for RRSIG, because we don't store RRSIG records
4587 if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4588 cname_ok = ISC_FALSE;
4591 * We now go looking for rdata...
4594 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4595 locktype = isc_rwlocktype_read;
4596 NODE_LOCK(lock, locktype);
4600 sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4601 negtype = RBTDB_RDATATYPE_VALUE(0, type);
4605 empty_node = ISC_TRUE;
4607 for (header = node->data; header != NULL; header = header_next) {
4608 header_next = header->next;
4609 if (header->rdh_ttl <= now) {
4611 * This rdataset is stale. If no one else is using the
4612 * node, we can clean it up right now, otherwise we
4613 * mark it as stale, and the node as dirty, so it will
4614 * get cleaned up later.
4616 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4617 (locktype == isc_rwlocktype_write ||
4618 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4620 * We update the node's status only when we
4621 * can get write access.
4623 locktype = isc_rwlocktype_write;
4625 if (dns_rbtnode_refcurrent(node) == 0) {
4628 mctx = search.rbtdb->common.mctx;
4629 clean_stale_headers(search.rbtdb, mctx,
4631 if (header_prev != NULL)
4635 node->data = header->next;
4636 free_rdataset(search.rbtdb, mctx,
4639 header->attributes |=
4640 RDATASET_ATTR_STALE;
4642 header_prev = header;
4645 header_prev = header;
4646 } else if (EXISTS(header)) {
4648 * We now know that there is at least one active
4649 * non-stale rdataset at this node.
4651 empty_node = ISC_FALSE;
4654 * If we found a type we were looking for, remember
4657 if (header->type == type ||
4658 (type == dns_rdatatype_any &&
4659 RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4660 (cname_ok && header->type ==
4661 dns_rdatatype_cname)) {
4663 * We've found the answer.
4666 if (header->type == dns_rdatatype_cname &&
4670 * If we've already got the CNAME RRSIG,
4671 * use it, otherwise change sigtype
4672 * so that we find it.
4674 if (cnamesig != NULL)
4675 foundsig = cnamesig;
4678 RBTDB_RDATATYPE_SIGCNAME;
4679 foundsig = cnamesig;
4681 } else if (header->type == sigtype) {
4683 * We've found the RRSIG rdataset for our
4684 * target type. Remember it.
4687 } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4688 header->type == negtype) {
4690 * We've found a negative cache entry.
4693 } else if (header->type == dns_rdatatype_ns) {
4695 * Remember a NS rdataset even if we're
4696 * not specifically looking for it, because
4697 * we might need it later.
4700 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4702 * If we need the NS rdataset, we'll also
4703 * need its signature.
4706 } else if (cname_ok &&
4707 header->type == RBTDB_RDATATYPE_SIGCNAME) {
4709 * If we get a CNAME match, we'll also need
4714 header_prev = header;
4716 header_prev = header;
4721 * We have an exact match for the name, but there are no
4722 * extant rdatasets. That means that this node doesn't
4723 * meaningfully exist, and that we really have a partial match.
4725 NODE_UNLOCK(lock, locktype);
4730 * If we didn't find what we were looking for...
4732 if (found == NULL ||
4733 (DNS_TRUST_ADDITIONAL(found->trust) &&
4734 ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4735 (found->trust == dns_trust_glue &&
4736 ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4737 (DNS_TRUST_PENDING(found->trust) &&
4738 ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4740 * If there is an NS rdataset at this node, then this is the
4743 if (nsheader != NULL) {
4744 if (nodep != NULL) {
4745 new_reference(search.rbtdb, node);
4746 INSIST(!ISC_LINK_LINKED(node, deadlink));
4749 bind_rdataset(search.rbtdb, node, nsheader, search.now,
4751 if (need_headerupdate(nsheader, search.now))
4753 if (nssig != NULL) {
4754 bind_rdataset(search.rbtdb, node, nssig,
4755 search.now, sigrdataset);
4756 if (need_headerupdate(nssig, search.now))
4759 result = DNS_R_DELEGATION;
4764 * Go find the deepest zone cut.
4766 NODE_UNLOCK(lock, locktype);
4771 * We found what we were looking for, or we found a CNAME.
4774 if (nodep != NULL) {
4775 new_reference(search.rbtdb, node);
4776 INSIST(!ISC_LINK_LINKED(node, deadlink));
4780 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4782 * We found a negative cache entry.
4784 if (NXDOMAIN(found))
4785 result = DNS_R_NCACHENXDOMAIN;
4787 result = DNS_R_NCACHENXRRSET;
4788 } else if (type != found->type &&
4789 type != dns_rdatatype_any &&
4790 found->type == dns_rdatatype_cname) {
4792 * We weren't doing an ANY query and we found a CNAME instead
4793 * of the type we were looking for, so we need to indicate
4794 * that result to the caller.
4796 result = DNS_R_CNAME;
4799 * An ordinary successful query!
4801 result = ISC_R_SUCCESS;
4804 if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4805 result == DNS_R_NCACHENXRRSET) {
4806 bind_rdataset(search.rbtdb, node, found, search.now,
4808 if (need_headerupdate(found, search.now))
4810 if (foundsig != NULL) {
4811 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4813 if (need_headerupdate(foundsig, search.now))
4814 updatesig = foundsig;
4819 if ((update != NULL || updatesig != NULL) &&
4820 locktype != isc_rwlocktype_write) {
4821 NODE_UNLOCK(lock, locktype);
4822 NODE_LOCK(lock, isc_rwlocktype_write);
4823 locktype = isc_rwlocktype_write;
4825 if (update != NULL && need_headerupdate(update, search.now))
4826 update_header(search.rbtdb, update, search.now);
4827 if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4828 update_header(search.rbtdb, updatesig, search.now);
4830 NODE_UNLOCK(lock, locktype);
4833 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4836 * If we found a zonecut but aren't going to use it, we have to
4839 if (search.need_cleanup) {
4840 node = search.zonecut;
4841 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4843 NODE_LOCK(lock, isc_rwlocktype_read);
4844 decrement_reference(search.rbtdb, node, 0,
4845 isc_rwlocktype_read, isc_rwlocktype_none,
4847 NODE_UNLOCK(lock, isc_rwlocktype_read);
4850 dns_rbtnodechain_reset(&search.chain);
4856 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4857 isc_stdtime_t now, dns_dbnode_t **nodep,
4858 dns_name_t *foundname,
4859 dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4861 dns_rbtnode_t *node = NULL;
4863 isc_result_t result;
4864 rbtdb_search_t search;
4865 rdatasetheader_t *header, *header_prev, *header_next;
4866 rdatasetheader_t *found, *foundsig;
4867 unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4868 isc_rwlocktype_t locktype;
4870 search.rbtdb = (dns_rbtdb_t *)db;
4872 REQUIRE(VALID_RBTDB(search.rbtdb));
4875 isc_stdtime_get(&now);
4877 search.rbtversion = NULL;
4879 search.options = options;
4880 search.copy_name = ISC_FALSE;
4881 search.need_cleanup = ISC_FALSE;
4882 search.wild = ISC_FALSE;
4883 search.zonecut = NULL;
4884 dns_fixedname_init(&search.zonecut_name);
4885 dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4888 if ((options & DNS_DBFIND_NOEXACT) != 0)
4889 rbtoptions |= DNS_RBTFIND_NOEXACT;
4891 RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4894 * Search down from the root of the tree.
4896 result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4897 &search.chain, rbtoptions, NULL, &search);
4899 if (result == DNS_R_PARTIALMATCH) {
4901 result = find_deepest_zonecut(&search, node, nodep, foundname,
4902 rdataset, sigrdataset);
4904 } else if (result != ISC_R_SUCCESS)
4908 * We now go looking for an NS rdataset at the node.
4911 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4912 locktype = isc_rwlocktype_read;
4913 NODE_LOCK(lock, locktype);
4918 for (header = node->data; header != NULL; header = header_next) {
4919 header_next = header->next;
4920 if (header->rdh_ttl <= now) {
4922 * This rdataset is stale. If no one else is using the
4923 * node, we can clean it up right now, otherwise we
4924 * mark it as stale, and the node as dirty, so it will
4925 * get cleaned up later.
4927 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4928 (locktype == isc_rwlocktype_write ||
4929 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4931 * We update the node's status only when we
4932 * can get write access.
4934 locktype = isc_rwlocktype_write;
4936 if (dns_rbtnode_refcurrent(node) == 0) {
4939 mctx = search.rbtdb->common.mctx;
4940 clean_stale_headers(search.rbtdb, mctx,
4942 if (header_prev != NULL)
4946 node->data = header->next;
4947 free_rdataset(search.rbtdb, mctx,
4950 header->attributes |=
4951 RDATASET_ATTR_STALE;
4953 header_prev = header;
4956 header_prev = header;
4957 } else if (EXISTS(header)) {
4959 * If we found a type we were looking for, remember
4962 if (header->type == dns_rdatatype_ns) {
4964 * Remember a NS rdataset even if we're
4965 * not specifically looking for it, because
4966 * we might need it later.
4969 } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4971 * If we need the NS rdataset, we'll also
4972 * need its signature.
4976 header_prev = header;
4978 header_prev = header;
4981 if (found == NULL) {
4983 * No NS records here.
4985 NODE_UNLOCK(lock, locktype);
4989 if (nodep != NULL) {
4990 new_reference(search.rbtdb, node);
4991 INSIST(!ISC_LINK_LINKED(node, deadlink));
4995 bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4996 if (foundsig != NULL)
4997 bind_rdataset(search.rbtdb, node, foundsig, search.now,
5000 if (need_headerupdate(found, search.now) ||
5001 (foundsig != NULL && need_headerupdate(foundsig, search.now))) {
5002 if (locktype != isc_rwlocktype_write) {
5003 NODE_UNLOCK(lock, locktype);
5004 NODE_LOCK(lock, isc_rwlocktype_write);
5005 locktype = isc_rwlocktype_write;
5007 if (need_headerupdate(found, search.now))
5008 update_header(search.rbtdb, found, search.now);
5009 if (foundsig != NULL &&
5010 need_headerupdate(foundsig, search.now)) {
5011 update_header(search.rbtdb, foundsig, search.now);
5015 NODE_UNLOCK(lock, locktype);
5018 RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5020 INSIST(!search.need_cleanup);
5022 dns_rbtnodechain_reset(&search.chain);
5024 if (result == DNS_R_DELEGATION)
5025 result = ISC_R_SUCCESS;
5031 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5032 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5033 dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5036 REQUIRE(VALID_RBTDB(rbtdb));
5037 REQUIRE(targetp != NULL && *targetp == NULL);
5039 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5040 dns_rbtnode_refincrement(node, &refs);
5042 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5048 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5049 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5050 dns_rbtnode_t *node;
5051 isc_boolean_t want_free = ISC_FALSE;
5052 isc_boolean_t inactive = ISC_FALSE;
5053 rbtdb_nodelock_t *nodelock;
5055 REQUIRE(VALID_RBTDB(rbtdb));
5056 REQUIRE(targetp != NULL && *targetp != NULL);
5058 node = (dns_rbtnode_t *)(*targetp);
5059 nodelock = &rbtdb->node_locks[node->locknum];
5061 NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5063 if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5064 isc_rwlocktype_none, ISC_FALSE)) {
5065 if (isc_refcount_current(&nodelock->references) == 0 &&
5066 nodelock->exiting) {
5067 inactive = ISC_TRUE;
5071 NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5076 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5078 if (rbtdb->active == 0)
5079 want_free = ISC_TRUE;
5080 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5082 char buf[DNS_NAME_FORMATSIZE];
5083 if (dns_name_dynamic(&rbtdb->common.origin))
5084 dns_name_format(&rbtdb->common.origin, buf,
5087 strcpy(buf, "<UNKNOWN>");
5088 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5089 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5090 "calling free_rbtdb(%s)", buf);
5091 free_rbtdb(rbtdb, ISC_TRUE, NULL);
5097 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5098 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5099 dns_rbtnode_t *rbtnode = node;
5100 rdatasetheader_t *header;
5101 isc_boolean_t force_expire = ISC_FALSE;
5103 * These are the category and module used by the cache cleaner.
5105 isc_boolean_t log = ISC_FALSE;
5106 isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5107 isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5108 int level = ISC_LOG_DEBUG(2);
5109 char printname[DNS_NAME_FORMATSIZE];
5111 REQUIRE(VALID_RBTDB(rbtdb));
5114 * Caller must hold a tree lock.
5118 isc_stdtime_get(&now);
5120 if (rbtdb->overmem) {
5123 isc_random_get(&val);
5125 * XXXDCL Could stand to have a better policy, like LRU.
5127 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5130 * Note that 'log' can be true IFF rbtdb->overmem is also true.
5131 * rbtdb->overmem can currently only be true for cache
5132 * databases -- hence all of the "overmem cache" log strings.
5134 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5136 isc_log_write(dns_lctx, category, module, level,
5137 "overmem cache: %s %s",
5138 force_expire ? "FORCE" : "check",
5139 dns_rbt_formatnodename(rbtnode,
5141 sizeof(printname)));
5145 * We may not need write access, but this code path is not performance
5146 * sensitive, so it should be okay to always lock as a writer.
5148 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5149 isc_rwlocktype_write);
5151 for (header = rbtnode->data; header != NULL; header = header->next)
5152 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5154 * We don't check if refcurrent(rbtnode) == 0 and try
5155 * to free like we do in cache_find(), because
5156 * refcurrent(rbtnode) must be non-zero. This is so
5157 * because 'node' is an argument to the function.
5159 header->attributes |= RDATASET_ATTR_STALE;
5162 isc_log_write(dns_lctx, category, module,
5163 level, "overmem cache: stale %s",
5165 } else if (force_expire) {
5166 if (! RETAIN(header)) {
5167 set_ttl(rbtdb, header, 0);
5168 header->attributes |= RDATASET_ATTR_STALE;
5171 isc_log_write(dns_lctx, category, module,
5172 level, "overmem cache: "
5173 "reprieve by RETAIN() %s",
5176 } else if (rbtdb->overmem && log)
5177 isc_log_write(dns_lctx, category, module, level,
5178 "overmem cache: saved %s", printname);
5180 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5181 isc_rwlocktype_write);
5183 return (ISC_R_SUCCESS);
5187 overmem(dns_db_t *db, isc_boolean_t overmem) {
5188 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5190 if (IS_CACHE(rbtdb))
5191 rbtdb->overmem = overmem;
5195 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5196 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5197 dns_rbtnode_t *rbtnode = node;
5198 isc_boolean_t first;
5200 REQUIRE(VALID_RBTDB(rbtdb));
5202 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5203 isc_rwlocktype_read);
5205 fprintf(out, "node %p, %u references, locknum = %u\n",
5206 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5208 if (rbtnode->data != NULL) {
5209 rdatasetheader_t *current, *top_next;
5211 for (current = rbtnode->data; current != NULL;
5212 current = top_next) {
5213 top_next = current->next;
5215 fprintf(out, "\ttype %u", current->type);
5221 "\tserial = %lu, ttl = %u, "
5222 "trust = %u, attributes = %u, "
5224 (unsigned long)current->serial,
5227 current->attributes,
5229 current = current->down;
5230 } while (current != NULL);
5233 fprintf(out, "(empty)\n");
5235 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5236 isc_rwlocktype_read);
5240 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5242 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5243 rbtdb_dbiterator_t *rbtdbiter;
5245 REQUIRE(VALID_RBTDB(rbtdb));
5247 rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5248 if (rbtdbiter == NULL)
5249 return (ISC_R_NOMEMORY);
5251 rbtdbiter->common.methods = &dbiterator_methods;
5252 rbtdbiter->common.db = NULL;
5253 dns_db_attach(db, &rbtdbiter->common.db);
5254 rbtdbiter->common.relative_names =
5255 ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5256 rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5257 rbtdbiter->common.cleaning = ISC_FALSE;
5258 rbtdbiter->paused = ISC_TRUE;
5259 rbtdbiter->tree_locked = isc_rwlocktype_none;
5260 rbtdbiter->result = ISC_R_SUCCESS;
5261 dns_fixedname_init(&rbtdbiter->name);
5262 dns_fixedname_init(&rbtdbiter->origin);
5263 rbtdbiter->node = NULL;
5264 rbtdbiter->delete = 0;
5265 rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5266 rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5267 memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5268 dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5269 dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5270 if (rbtdbiter->nsec3only)
5271 rbtdbiter->current = &rbtdbiter->nsec3chain;
5273 rbtdbiter->current = &rbtdbiter->chain;
5275 *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5277 return (ISC_R_SUCCESS);
5281 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5282 dns_rdatatype_t type, dns_rdatatype_t covers,
5283 isc_stdtime_t now, dns_rdataset_t *rdataset,
5284 dns_rdataset_t *sigrdataset)
5286 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5287 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5288 rdatasetheader_t *header, *header_next, *found, *foundsig;
5289 rbtdb_serial_t serial;
5290 rbtdb_version_t *rbtversion = version;
5291 isc_boolean_t close_version = ISC_FALSE;
5292 rbtdb_rdatatype_t matchtype, sigmatchtype;
5294 REQUIRE(VALID_RBTDB(rbtdb));
5295 REQUIRE(type != dns_rdatatype_any);
5297 if (rbtversion == NULL) {
5298 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5299 close_version = ISC_TRUE;
5301 serial = rbtversion->serial;
5304 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5305 isc_rwlocktype_read);
5309 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5311 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5315 for (header = rbtnode->data; header != NULL; header = header_next) {
5316 header_next = header->next;
5318 if (header->serial <= serial &&
5321 * Is this a "this rdataset doesn't
5324 if (NONEXISTENT(header))
5328 header = header->down;
5329 } while (header != NULL);
5330 if (header != NULL) {
5332 * We have an active, extant rdataset. If it's a
5333 * type we're looking for, remember it.
5335 if (header->type == matchtype) {
5337 if (foundsig != NULL)
5339 } else if (header->type == sigmatchtype) {
5346 if (found != NULL) {
5347 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5348 if (foundsig != NULL)
5349 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5353 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5354 isc_rwlocktype_read);
5357 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5361 return (ISC_R_NOTFOUND);
5363 return (ISC_R_SUCCESS);
5367 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5368 dns_rdatatype_t type, dns_rdatatype_t covers,
5369 isc_stdtime_t now, dns_rdataset_t *rdataset,
5370 dns_rdataset_t *sigrdataset)
5372 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5373 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5374 rdatasetheader_t *header, *header_next, *found, *foundsig;
5375 rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5376 isc_result_t result;
5378 isc_rwlocktype_t locktype;
5380 REQUIRE(VALID_RBTDB(rbtdb));
5381 REQUIRE(type != dns_rdatatype_any);
5385 result = ISC_R_SUCCESS;
5388 isc_stdtime_get(&now);
5390 lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5391 locktype = isc_rwlocktype_read;
5392 NODE_LOCK(lock, locktype);
5396 matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5397 negtype = RBTDB_RDATATYPE_VALUE(0, type);
5399 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5403 for (header = rbtnode->data; header != NULL; header = header_next) {
5404 header_next = header->next;
5405 if (header->rdh_ttl <= now) {
5406 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5407 (locktype == isc_rwlocktype_write ||
5408 NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5410 * We update the node's status only when we
5411 * can get write access.
5413 locktype = isc_rwlocktype_write;
5416 * We don't check if refcurrent(rbtnode) == 0
5417 * and try to free like we do in cache_find(),
5418 * because refcurrent(rbtnode) must be
5419 * non-zero. This is so because 'node' is an
5420 * argument to the function.
5422 header->attributes |= RDATASET_ATTR_STALE;
5425 } else if (EXISTS(header)) {
5426 if (header->type == matchtype)
5428 else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5429 header->type == negtype)
5431 else if (header->type == sigmatchtype)
5435 if (found != NULL) {
5436 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5437 if (foundsig != NULL)
5438 bind_rdataset(rbtdb, rbtnode, foundsig, now,
5442 NODE_UNLOCK(lock, locktype);
5445 return (ISC_R_NOTFOUND);
5447 if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5449 * We found a negative cache entry.
5451 if (NXDOMAIN(found))
5452 result = DNS_R_NCACHENXDOMAIN;
5454 result = DNS_R_NCACHENXRRSET;
5461 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5462 isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5464 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5465 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5466 rbtdb_version_t *rbtversion = version;
5467 rbtdb_rdatasetiter_t *iterator;
5470 REQUIRE(VALID_RBTDB(rbtdb));
5472 iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5473 if (iterator == NULL)
5474 return (ISC_R_NOMEMORY);
5476 if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5478 if (rbtversion == NULL)
5480 (dns_dbversion_t **) (void *)(&rbtversion));
5484 isc_refcount_increment(&rbtversion->references,
5490 isc_stdtime_get(&now);
5494 iterator->common.magic = DNS_RDATASETITER_MAGIC;
5495 iterator->common.methods = &rdatasetiter_methods;
5496 iterator->common.db = db;
5497 iterator->common.node = node;
5498 iterator->common.version = (dns_dbversion_t *)rbtversion;
5499 iterator->common.now = now;
5501 NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5503 dns_rbtnode_refincrement(rbtnode, &refs);
5506 iterator->current = NULL;
5508 NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5510 *iteratorp = (dns_rdatasetiter_t *)iterator;
5512 return (ISC_R_SUCCESS);
5515 static isc_boolean_t
5516 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5517 rdatasetheader_t *header, *header_next;
5518 isc_boolean_t cname, other_data;
5519 dns_rdatatype_t rdtype;
5522 * The caller must hold the node lock.
5526 * Look for CNAME and "other data" rdatasets active in our version.
5529 other_data = ISC_FALSE;
5530 for (header = node->data; header != NULL; header = header_next) {
5531 header_next = header->next;
5532 if (header->type == dns_rdatatype_cname) {
5534 * Look for an active extant CNAME.
5537 if (header->serial <= serial &&
5540 * Is this a "this rdataset doesn't
5543 if (NONEXISTENT(header))
5547 header = header->down;
5548 } while (header != NULL);
5553 * Look for active extant "other data".
5555 * "Other data" is any rdataset whose type is not
5556 * KEY, NSEC, SIG or RRSIG.
5558 rdtype = RBTDB_RDATATYPE_BASE(header->type);
5559 if (rdtype != dns_rdatatype_key &&
5560 rdtype != dns_rdatatype_sig &&
5561 rdtype != dns_rdatatype_nsec &&
5562 rdtype != dns_rdatatype_rrsig) {
5564 * Is it active and extant?
5567 if (header->serial <= serial &&
5570 * Is this a "this rdataset
5571 * doesn't exist" record?
5573 if (NONEXISTENT(header))
5577 header = header->down;
5578 } while (header != NULL);
5580 other_data = ISC_TRUE;
5585 if (cname && other_data)
5592 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5593 isc_result_t result;
5595 INSIST(!IS_CACHE(rbtdb));
5596 INSIST(newheader->heap_index == 0);
5597 INSIST(!ISC_LINK_LINKED(newheader, link));
5599 result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5604 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5605 rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5606 dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5608 rbtdb_changed_t *changed = NULL;
5609 rdatasetheader_t *topheader, *topheader_prev, *header;
5610 unsigned char *merged;
5611 isc_result_t result;
5612 isc_boolean_t header_nx;
5613 isc_boolean_t newheader_nx;
5614 isc_boolean_t merge;
5615 dns_rdatatype_t rdtype, covers;
5616 rbtdb_rdatatype_t negtype;
5621 * Add an rdatasetheader_t to a node.
5625 * Caller must be holding the node lock.
5628 if ((options & DNS_DBADD_MERGE) != 0) {
5629 REQUIRE(rbtversion != NULL);
5634 if ((options & DNS_DBADD_FORCE) != 0)
5635 trust = dns_trust_ultimate;
5637 trust = newheader->trust;
5639 if (rbtversion != NULL && !loading) {
5641 * We always add a changed record, even if no changes end up
5642 * being made to this node, because it's harmless and
5643 * simplifies the code.
5645 changed = add_changed(rbtdb, rbtversion, rbtnode);
5646 if (changed == NULL) {
5647 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5648 return (ISC_R_NOMEMORY);
5652 newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5653 topheader_prev = NULL;
5656 if (rbtversion == NULL && !newheader_nx) {
5657 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5660 * We're adding a negative cache entry.
5662 covers = RBTDB_RDATATYPE_EXT(newheader->type);
5663 if (covers == dns_rdatatype_any) {
5665 * We're adding an negative cache entry
5666 * which covers all types (NXDOMAIN,
5667 * NODATA(QTYPE=ANY)).
5669 * We make all other data stale so that the
5670 * only rdataset that can be found at this
5671 * node is the negative cache entry.
5673 for (topheader = rbtnode->data;
5675 topheader = topheader->next) {
5676 set_ttl(rbtdb, topheader, 0);
5677 topheader->attributes |=
5678 RDATASET_ATTR_STALE;
5683 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5686 * We're adding something that isn't a
5687 * negative cache entry. Look for an extant
5688 * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5691 for (topheader = rbtnode->data;
5693 topheader = topheader->next) {
5694 if (topheader->type ==
5695 RBTDB_RDATATYPE_NCACHEANY)
5698 if (topheader != NULL && EXISTS(topheader) &&
5699 topheader->rdh_ttl > now) {
5703 if (trust < topheader->trust) {
5705 * The NXDOMAIN/NODATA(QTYPE=ANY)
5708 free_rdataset(rbtdb,
5711 if (addedrdataset != NULL)
5712 bind_rdataset(rbtdb, rbtnode,
5715 return (DNS_R_UNCHANGED);
5718 * The new rdataset is better. Expire the
5719 * NXDOMAIN/NODATA(QTYPE=ANY).
5721 set_ttl(rbtdb, topheader, 0);
5722 topheader->attributes |= RDATASET_ATTR_STALE;
5727 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5731 for (topheader = rbtnode->data;
5733 topheader = topheader->next) {
5734 if (topheader->type == newheader->type ||
5735 topheader->type == negtype)
5737 topheader_prev = topheader;
5742 * If header isn't NULL, we've found the right type. There may be
5743 * IGNORE rdatasets between the top of the chain and the first real
5744 * data. We skip over them.
5747 while (header != NULL && IGNORE(header))
5748 header = header->down;
5749 if (header != NULL) {
5750 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5753 * Deleting an already non-existent rdataset has no effect.
5755 if (header_nx && newheader_nx) {
5756 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5757 return (DNS_R_UNCHANGED);
5761 * Trying to add an rdataset with lower trust to a cache DB
5762 * has no effect, provided that the cache data isn't stale.
5764 if (rbtversion == NULL && trust < header->trust &&
5765 (header->rdh_ttl > now || header_nx)) {
5766 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5767 if (addedrdataset != NULL)
5768 bind_rdataset(rbtdb, rbtnode, header, now,
5770 return (DNS_R_UNCHANGED);
5774 * Don't merge if a nonexistent rdataset is involved.
5776 if (merge && (header_nx || newheader_nx))
5780 * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5781 * that is the union of 'newheader' and 'header'.
5784 unsigned int flags = 0;
5785 INSIST(rbtversion->serial >= header->serial);
5787 result = ISC_R_SUCCESS;
5789 if ((options & DNS_DBADD_EXACT) != 0)
5790 flags |= DNS_RDATASLAB_EXACT;
5791 if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5792 newheader->rdh_ttl != header->rdh_ttl)
5793 result = DNS_R_NOTEXACT;
5794 else if (newheader->rdh_ttl != header->rdh_ttl)
5795 flags |= DNS_RDATASLAB_FORCE;
5796 if (result == ISC_R_SUCCESS)
5797 result = dns_rdataslab_merge(
5798 (unsigned char *)header,
5799 (unsigned char *)newheader,
5800 (unsigned int)(sizeof(*newheader)),
5802 rbtdb->common.rdclass,
5803 (dns_rdatatype_t)header->type,
5805 if (result == ISC_R_SUCCESS) {
5807 * If 'header' has the same serial number as
5808 * we do, we could clean it up now if we knew
5809 * that our caller had no references to it.
5810 * We don't know this, however, so we leave it
5811 * alone. It will get cleaned up when
5812 * clean_zone_node() runs.
5814 free_rdataset(rbtdb, rbtdb->common.mctx,
5816 newheader = (rdatasetheader_t *)merged;
5817 init_rdataset(rbtdb, newheader);
5818 if (loading && RESIGN(newheader) &&
5820 header->resign < newheader->resign)
5821 newheader->resign = header->resign;
5823 free_rdataset(rbtdb, rbtdb->common.mctx,
5829 * Don't replace existing NS, A and AAAA RRsets
5830 * in the cache if they are already exist. This
5831 * prevents named being locked to old servers.
5832 * Don't lower trust of existing record if the
5835 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5836 header->type == dns_rdatatype_ns &&
5837 !header_nx && !newheader_nx &&
5838 header->trust >= newheader->trust &&
5839 dns_rdataslab_equalx((unsigned char *)header,
5840 (unsigned char *)newheader,
5841 (unsigned int)(sizeof(*newheader)),
5842 rbtdb->common.rdclass,
5843 (dns_rdatatype_t)header->type)) {
5845 * Honour the new ttl if it is less than the
5848 if (header->rdh_ttl > newheader->rdh_ttl)
5849 set_ttl(rbtdb, header, newheader->rdh_ttl);
5850 if (header->noqname == NULL &&
5851 newheader->noqname != NULL) {
5852 header->noqname = newheader->noqname;
5853 newheader->noqname = NULL;
5855 if (header->closest == NULL &&
5856 newheader->closest != NULL) {
5857 header->closest = newheader->closest;
5858 newheader->closest = NULL;
5860 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5861 if (addedrdataset != NULL)
5862 bind_rdataset(rbtdb, rbtnode, header, now,
5864 return (ISC_R_SUCCESS);
5866 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5867 (header->type == dns_rdatatype_a ||
5868 header->type == dns_rdatatype_aaaa) &&
5869 !header_nx && !newheader_nx &&
5870 header->trust >= newheader->trust &&
5871 dns_rdataslab_equal((unsigned char *)header,
5872 (unsigned char *)newheader,
5873 (unsigned int)(sizeof(*newheader)))) {
5875 * Honour the new ttl if it is less than the
5878 if (header->rdh_ttl > newheader->rdh_ttl)
5879 set_ttl(rbtdb, header, newheader->rdh_ttl);
5880 if (header->noqname == NULL &&
5881 newheader->noqname != NULL) {
5882 header->noqname = newheader->noqname;
5883 newheader->noqname = NULL;
5885 if (header->closest == NULL &&
5886 newheader->closest != NULL) {
5887 header->closest = newheader->closest;
5888 newheader->closest = NULL;
5890 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5891 if (addedrdataset != NULL)
5892 bind_rdataset(rbtdb, rbtnode, header, now,
5894 return (ISC_R_SUCCESS);
5896 INSIST(rbtversion == NULL ||
5897 rbtversion->serial >= topheader->serial);
5898 if (topheader_prev != NULL)
5899 topheader_prev->next = newheader;
5901 rbtnode->data = newheader;
5902 newheader->next = topheader->next;
5905 * There are no other references to 'header' when
5906 * loading, so we MAY clean up 'header' now.
5907 * Since we don't generate changed records when
5908 * loading, we MUST clean up 'header' now.
5910 newheader->down = NULL;
5911 free_rdataset(rbtdb, rbtdb->common.mctx, header);
5913 newheader->down = topheader;
5914 topheader->next = newheader;
5916 if (changed != NULL)
5917 changed->dirty = ISC_TRUE;
5918 if (rbtversion == NULL) {
5919 set_ttl(rbtdb, header, 0);
5920 header->attributes |= RDATASET_ATTR_STALE;
5922 idx = newheader->node->locknum;
5923 if (IS_CACHE(rbtdb)) {
5924 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5927 * XXXMLG We don't check the return value
5928 * here. If it fails, we will not do TTL
5929 * based expiry on this node. However, we
5930 * will do it on the LRU side, so memory
5931 * will not leak... for long.
5933 isc_heap_insert(rbtdb->heaps[idx], newheader);
5934 } else if (RESIGN(newheader))
5935 resign_insert(rbtdb, idx, newheader);
5939 * No non-IGNORED rdatasets of the given type exist at
5944 * If we're trying to delete the type, don't bother.
5947 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5948 return (DNS_R_UNCHANGED);
5951 if (topheader != NULL) {
5953 * We have an list of rdatasets of the given type,
5954 * but they're all marked IGNORE. We simply insert
5955 * the new rdataset at the head of the list.
5957 * Ignored rdatasets cannot occur during loading, so
5961 INSIST(rbtversion == NULL ||
5962 rbtversion->serial >= topheader->serial);
5963 if (topheader_prev != NULL)
5964 topheader_prev->next = newheader;
5966 rbtnode->data = newheader;
5967 newheader->next = topheader->next;
5968 newheader->down = topheader;
5969 topheader->next = newheader;
5971 if (changed != NULL)
5972 changed->dirty = ISC_TRUE;
5975 * No rdatasets of the given type exist at the node.
5977 newheader->next = rbtnode->data;
5978 newheader->down = NULL;
5979 rbtnode->data = newheader;
5981 idx = newheader->node->locknum;
5982 if (IS_CACHE(rbtdb)) {
5983 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5985 isc_heap_insert(rbtdb->heaps[idx], newheader);
5986 } else if (RESIGN(newheader)) {
5987 resign_insert(rbtdb, idx, newheader);
5992 * Check if the node now contains CNAME and other data.
5994 if (rbtversion != NULL &&
5995 cname_and_other_data(rbtnode, rbtversion->serial))
5996 return (DNS_R_CNAMEANDOTHER);
5998 if (addedrdataset != NULL)
5999 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6001 return (ISC_R_SUCCESS);
6004 static inline isc_boolean_t
6005 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6006 rbtdb_rdatatype_t type)
6008 if (IS_CACHE(rbtdb)) {
6009 if (type == dns_rdatatype_dname)
6013 } else if (type == dns_rdatatype_dname ||
6014 (type == dns_rdatatype_ns &&
6015 (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6020 static inline isc_result_t
6021 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6022 dns_rdataset_t *rdataset)
6024 struct noqname *noqname;
6025 isc_mem_t *mctx = rbtdb->common.mctx;
6027 dns_rdataset_t neg, negsig;
6028 isc_result_t result;
6031 dns_name_init(&name, NULL);
6032 dns_rdataset_init(&neg);
6033 dns_rdataset_init(&negsig);
6035 result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6036 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6038 noqname = isc_mem_get(mctx, sizeof(*noqname));
6039 if (noqname == NULL) {
6040 result = ISC_R_NOMEMORY;
6043 dns_name_init(&noqname->name, NULL);
6044 noqname->neg = NULL;
6045 noqname->negsig = NULL;
6046 noqname->type = neg.type;
6047 result = dns_name_dup(&name, mctx, &noqname->name);
6048 if (result != ISC_R_SUCCESS)
6050 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6051 if (result != ISC_R_SUCCESS)
6053 noqname->neg = r.base;
6054 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6055 if (result != ISC_R_SUCCESS)
6057 noqname->negsig = r.base;
6058 dns_rdataset_disassociate(&neg);
6059 dns_rdataset_disassociate(&negsig);
6060 newheader->noqname = noqname;
6061 return (ISC_R_SUCCESS);
6064 dns_rdataset_disassociate(&neg);
6065 dns_rdataset_disassociate(&negsig);
6066 free_noqname(mctx, &noqname);
6070 static inline isc_result_t
6071 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6072 dns_rdataset_t *rdataset)
6074 struct noqname *closest;
6075 isc_mem_t *mctx = rbtdb->common.mctx;
6077 dns_rdataset_t neg, negsig;
6078 isc_result_t result;
6081 dns_name_init(&name, NULL);
6082 dns_rdataset_init(&neg);
6083 dns_rdataset_init(&negsig);
6085 result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6086 RUNTIME_CHECK(result == ISC_R_SUCCESS);
6088 closest = isc_mem_get(mctx, sizeof(*closest));
6089 if (closest == NULL) {
6090 result = ISC_R_NOMEMORY;
6093 dns_name_init(&closest->name, NULL);
6094 closest->neg = NULL;
6095 closest->negsig = NULL;
6096 closest->type = neg.type;
6097 result = dns_name_dup(&name, mctx, &closest->name);
6098 if (result != ISC_R_SUCCESS)
6100 result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6101 if (result != ISC_R_SUCCESS)
6103 closest->neg = r.base;
6104 result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6105 if (result != ISC_R_SUCCESS)
6107 closest->negsig = r.base;
6108 dns_rdataset_disassociate(&neg);
6109 dns_rdataset_disassociate(&negsig);
6110 newheader->closest = closest;
6111 return (ISC_R_SUCCESS);
6114 dns_rdataset_disassociate(&neg);
6115 dns_rdataset_disassociate(&negsig);
6116 free_noqname(mctx, &closest);
6120 static dns_dbmethods_t zone_methods;
6123 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6124 isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6125 dns_rdataset_t *addedrdataset)
6127 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6128 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6129 rbtdb_version_t *rbtversion = version;
6130 isc_region_t region;
6131 rdatasetheader_t *newheader;
6132 rdatasetheader_t *header;
6133 isc_result_t result;
6134 isc_boolean_t delegating;
6135 isc_boolean_t newnsec;
6136 isc_boolean_t tree_locked = ISC_FALSE;
6138 REQUIRE(VALID_RBTDB(rbtdb));
6140 if (rbtdb->common.methods == &zone_methods)
6141 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6142 (rdataset->type == dns_rdatatype_nsec3 ||
6143 rdataset->covers == dns_rdatatype_nsec3)) ||
6144 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6145 rdataset->type != dns_rdatatype_nsec3 &&
6146 rdataset->covers != dns_rdatatype_nsec3)));
6148 if (rbtversion == NULL) {
6150 isc_stdtime_get(&now);
6154 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6156 sizeof(rdatasetheader_t));
6157 if (result != ISC_R_SUCCESS)
6160 newheader = (rdatasetheader_t *)region.base;
6161 init_rdataset(rbtdb, newheader);
6162 set_ttl(rbtdb, newheader, rdataset->ttl + now);
6163 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6165 newheader->attributes = 0;
6166 newheader->noqname = NULL;
6167 newheader->closest = NULL;
6168 newheader->count = init_count++;
6169 newheader->trust = rdataset->trust;
6170 newheader->additional_auth = NULL;
6171 newheader->additional_glue = NULL;
6172 newheader->last_used = now;
6173 newheader->node = rbtnode;
6174 if (rbtversion != NULL) {
6175 newheader->serial = rbtversion->serial;
6178 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6179 newheader->attributes |= RDATASET_ATTR_RESIGN;
6180 newheader->resign = rdataset->resign;
6182 newheader->resign = 0;
6184 newheader->serial = 1;
6185 newheader->resign = 0;
6186 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6187 newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6188 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6189 newheader->attributes |= RDATASET_ATTR_OPTOUT;
6190 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6191 result = addnoqname(rbtdb, newheader, rdataset);
6192 if (result != ISC_R_SUCCESS) {
6193 free_rdataset(rbtdb, rbtdb->common.mctx,
6198 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6199 result = addclosest(rbtdb, newheader, rdataset);
6200 if (result != ISC_R_SUCCESS) {
6201 free_rdataset(rbtdb, rbtdb->common.mctx,
6209 * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6210 * just DNAME for the cache), then we need to set the callback bit
6213 if (delegating_type(rbtdb, rbtnode, rdataset->type))
6214 delegating = ISC_TRUE;
6216 delegating = ISC_FALSE;
6219 * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6221 if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6222 rdataset->type == dns_rdatatype_nsec)
6225 newnsec = ISC_FALSE;
6228 * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6229 * or the DB is a cache in an overmem state, hold an exclusive lock on
6230 * the tree. In the latter case the lock does not necessarily have to
6231 * be acquired but it will help purge stale entries more effectively.
6233 if (delegating || newnsec || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6234 tree_locked = ISC_TRUE;
6235 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6238 if (IS_CACHE(rbtdb) && rbtdb->overmem)
6239 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6241 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6242 isc_rwlocktype_write);
6244 if (rbtdb->rrsetstats != NULL) {
6245 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6246 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6249 if (IS_CACHE(rbtdb)) {
6251 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6253 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6254 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6255 expire_header(rbtdb, header, tree_locked);
6258 * If we've been holding a write lock on the tree just for
6259 * cleaning, we can release it now. However, we still need the
6262 if (tree_locked && !delegating && !newnsec) {
6263 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6264 tree_locked = ISC_FALSE;
6268 result = ISC_R_SUCCESS;
6270 dns_fixedname_t fname;
6272 dns_rbtnode_t *nsecnode;
6274 dns_fixedname_init(&fname);
6275 name = dns_fixedname_name(&fname);
6276 dns_rbt_fullnamefromnode(rbtnode, name);
6278 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6279 if (result == ISC_R_SUCCESS) {
6280 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6281 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6282 } else if (result == ISC_R_EXISTS) {
6283 rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6284 result = ISC_R_SUCCESS;
6288 if (result == ISC_R_SUCCESS)
6289 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6290 ISC_FALSE, addedrdataset, now);
6291 if (result == ISC_R_SUCCESS && delegating)
6292 rbtnode->find_callback = 1;
6294 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6295 isc_rwlocktype_write);
6298 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6301 * Update the zone's secure status. If version is non-NULL
6302 * this is deferred until closeversion() is called.
6304 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6305 iszonesecure(db, version, rbtdb->origin_node);
6311 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6312 dns_rdataset_t *rdataset, unsigned int options,
6313 dns_rdataset_t *newrdataset)
6315 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6316 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6317 rbtdb_version_t *rbtversion = version;
6318 rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6319 unsigned char *subresult;
6320 isc_region_t region;
6321 isc_result_t result;
6322 rbtdb_changed_t *changed;
6324 REQUIRE(VALID_RBTDB(rbtdb));
6326 if (rbtdb->common.methods == &zone_methods)
6327 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6328 (rdataset->type == dns_rdatatype_nsec3 ||
6329 rdataset->covers == dns_rdatatype_nsec3)) ||
6330 (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6331 rdataset->type != dns_rdatatype_nsec3 &&
6332 rdataset->covers != dns_rdatatype_nsec3)));
6334 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6336 sizeof(rdatasetheader_t));
6337 if (result != ISC_R_SUCCESS)
6339 newheader = (rdatasetheader_t *)region.base;
6340 init_rdataset(rbtdb, newheader);
6341 set_ttl(rbtdb, newheader, rdataset->ttl);
6342 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6344 newheader->attributes = 0;
6345 newheader->serial = rbtversion->serial;
6346 newheader->trust = 0;
6347 newheader->noqname = NULL;
6348 newheader->closest = NULL;
6349 newheader->count = init_count++;
6350 newheader->additional_auth = NULL;
6351 newheader->additional_glue = NULL;
6352 newheader->last_used = 0;
6353 newheader->node = rbtnode;
6354 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6355 newheader->attributes |= RDATASET_ATTR_RESIGN;
6356 newheader->resign = rdataset->resign;
6358 newheader->resign = 0;
6360 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6361 isc_rwlocktype_write);
6363 changed = add_changed(rbtdb, rbtversion, rbtnode);
6364 if (changed == NULL) {
6365 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6366 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6367 isc_rwlocktype_write);
6368 return (ISC_R_NOMEMORY);
6371 topheader_prev = NULL;
6372 for (topheader = rbtnode->data;
6374 topheader = topheader->next) {
6375 if (topheader->type == newheader->type)
6377 topheader_prev = topheader;
6380 * If header isn't NULL, we've found the right type. There may be
6381 * IGNORE rdatasets between the top of the chain and the first real
6382 * data. We skip over them.
6385 while (header != NULL && IGNORE(header))
6386 header = header->down;
6387 if (header != NULL && EXISTS(header)) {
6388 unsigned int flags = 0;
6390 result = ISC_R_SUCCESS;
6391 if ((options & DNS_DBSUB_EXACT) != 0) {
6392 flags |= DNS_RDATASLAB_EXACT;
6393 if (newheader->rdh_ttl != header->rdh_ttl)
6394 result = DNS_R_NOTEXACT;
6396 if (result == ISC_R_SUCCESS)
6397 result = dns_rdataslab_subtract(
6398 (unsigned char *)header,
6399 (unsigned char *)newheader,
6400 (unsigned int)(sizeof(*newheader)),
6402 rbtdb->common.rdclass,
6403 (dns_rdatatype_t)header->type,
6405 if (result == ISC_R_SUCCESS) {
6406 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6407 newheader = (rdatasetheader_t *)subresult;
6408 init_rdataset(rbtdb, newheader);
6410 * We have to set the serial since the rdataslab
6411 * subtraction routine copies the reserved portion of
6412 * header, not newheader.
6414 newheader->serial = rbtversion->serial;
6416 * XXXJT: dns_rdataslab_subtract() copied the pointers
6417 * to additional info. We need to clear these fields
6418 * to avoid having duplicated references.
6420 newheader->additional_auth = NULL;
6421 newheader->additional_glue = NULL;
6422 } else if (result == DNS_R_NXRRSET) {
6424 * This subtraction would remove all of the rdata;
6425 * add a nonexistent header instead.
6427 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6428 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6429 if (newheader == NULL) {
6430 result = ISC_R_NOMEMORY;
6433 set_ttl(rbtdb, newheader, 0);
6434 newheader->type = topheader->type;
6435 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6436 newheader->trust = 0;
6437 newheader->serial = rbtversion->serial;
6438 newheader->noqname = NULL;
6439 newheader->closest = NULL;
6440 newheader->count = 0;
6441 newheader->additional_auth = NULL;
6442 newheader->additional_glue = NULL;
6443 newheader->node = rbtnode;
6444 newheader->resign = 0;
6445 newheader->last_used = 0;
6447 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6452 * If we're here, we want to link newheader in front of
6455 INSIST(rbtversion->serial >= topheader->serial);
6456 if (topheader_prev != NULL)
6457 topheader_prev->next = newheader;
6459 rbtnode->data = newheader;
6460 newheader->next = topheader->next;
6461 newheader->down = topheader;
6462 topheader->next = newheader;
6464 changed->dirty = ISC_TRUE;
6467 * The rdataset doesn't exist, so we don't need to do anything
6468 * to satisfy the deletion request.
6470 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6471 if ((options & DNS_DBSUB_EXACT) != 0)
6472 result = DNS_R_NOTEXACT;
6474 result = DNS_R_UNCHANGED;
6477 if (result == ISC_R_SUCCESS && newrdataset != NULL)
6478 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6481 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6482 isc_rwlocktype_write);
6485 * Update the zone's secure status. If version is non-NULL
6486 * this is deferred until closeversion() is called.
6488 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6489 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6495 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6496 dns_rdatatype_t type, dns_rdatatype_t covers)
6498 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6499 dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6500 rbtdb_version_t *rbtversion = version;
6501 isc_result_t result;
6502 rdatasetheader_t *newheader;
6504 REQUIRE(VALID_RBTDB(rbtdb));
6506 if (type == dns_rdatatype_any)
6507 return (ISC_R_NOTIMPLEMENTED);
6508 if (type == dns_rdatatype_rrsig && covers == 0)
6509 return (ISC_R_NOTIMPLEMENTED);
6511 newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6512 if (newheader == NULL)
6513 return (ISC_R_NOMEMORY);
6514 set_ttl(rbtdb, newheader, 0);
6515 newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6516 newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6517 newheader->trust = 0;
6518 newheader->noqname = NULL;
6519 newheader->closest = NULL;
6520 newheader->additional_auth = NULL;
6521 newheader->additional_glue = NULL;
6522 if (rbtversion != NULL)
6523 newheader->serial = rbtversion->serial;
6525 newheader->serial = 0;
6526 newheader->count = 0;
6527 newheader->last_used = 0;
6528 newheader->node = rbtnode;
6530 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6531 isc_rwlocktype_write);
6533 result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6534 ISC_FALSE, NULL, 0);
6536 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6537 isc_rwlocktype_write);
6540 * Update the zone's secure status. If version is non-NULL
6541 * this is deferred until closeversion() is called.
6543 if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6544 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6550 * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6553 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6554 isc_boolean_t hasnsec)
6556 isc_result_t noderesult, nsecresult;
6557 dns_rbtnode_t *nsecnode;
6559 noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6561 return (noderesult);
6562 if (noderesult == ISC_R_EXISTS) {
6564 * Add a node to the auxiliary NSEC tree for an old node
6565 * just now getting an NSEC record.
6567 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6568 return (noderesult);
6569 } else if (noderesult != ISC_R_SUCCESS) {
6570 return (noderesult);
6574 * Build the auxiliary tree for NSECs as we go.
6575 * This tree speeds searches for closest NSECs that would otherwise
6576 * need to examine many irrelevant nodes in large TLDs.
6578 * Add nodes to the auxiliary tree after corresponding nodes have
6579 * been added to the main tree.
6582 nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6583 if (nsecresult == ISC_R_SUCCESS) {
6584 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6585 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6586 return (noderesult);
6589 if (nsecresult == ISC_R_EXISTS) {
6591 isc_log_write(dns_lctx,
6592 DNS_LOGCATEGORY_DATABASE,
6593 DNS_LOGMODULE_CACHE,
6595 "addnode: NSEC node already exists");
6597 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6598 return (noderesult);
6601 nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6602 if (nsecresult != ISC_R_SUCCESS)
6603 isc_log_write(dns_lctx,
6604 DNS_LOGCATEGORY_DATABASE,
6605 DNS_LOGMODULE_CACHE,
6607 "loading_addrdataset: "
6608 "dns_rbt_deletenode: %s after "
6609 "dns_rbt_addnode(NSEC): %s",
6610 isc_result_totext(nsecresult),
6611 isc_result_totext(noderesult));
6612 return (noderesult);
6616 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6617 rbtdb_load_t *loadctx = arg;
6618 dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6619 dns_rbtnode_t *node;
6620 isc_result_t result;
6621 isc_region_t region;
6622 rdatasetheader_t *newheader;
6625 * This routine does no node locking. See comments in
6626 * 'load' below for more information on loading and
6632 * SOA records are only allowed at top of zone.
6634 if (rdataset->type == dns_rdatatype_soa &&
6635 !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6636 return (DNS_R_NOTZONETOP);
6638 if (rdataset->type != dns_rdatatype_nsec3 &&
6639 rdataset->covers != dns_rdatatype_nsec3)
6640 add_empty_wildcards(rbtdb, name);
6642 if (dns_name_iswildcard(name)) {
6644 * NS record owners cannot legally be wild cards.
6646 if (rdataset->type == dns_rdatatype_ns)
6647 return (DNS_R_INVALIDNS);
6649 * NSEC3 record owners cannot legally be wild cards.
6651 if (rdataset->type == dns_rdatatype_nsec3)
6652 return (DNS_R_INVALIDNSEC3);
6653 result = add_wildcard_magic(rbtdb, name);
6654 if (result != ISC_R_SUCCESS)
6659 if (rdataset->type == dns_rdatatype_nsec3 ||
6660 rdataset->covers == dns_rdatatype_nsec3) {
6661 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6662 if (result == ISC_R_SUCCESS)
6663 node->nsec = DNS_RBT_NSEC_NSEC3;
6664 } else if (rdataset->type == dns_rdatatype_nsec) {
6665 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6667 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6669 if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6671 if (result != ISC_R_EXISTS) {
6672 dns_name_t foundname;
6673 dns_name_init(&foundname, NULL);
6674 dns_rbt_namefromnode(node, &foundname);
6675 #ifdef DNS_RBT_USEHASH
6676 node->locknum = node->hashval % rbtdb->node_lock_count;
6678 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6679 rbtdb->node_lock_count;
6683 result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6685 sizeof(rdatasetheader_t));
6686 if (result != ISC_R_SUCCESS)
6688 newheader = (rdatasetheader_t *)region.base;
6689 init_rdataset(rbtdb, newheader);
6690 set_ttl(rbtdb, newheader,
6691 rdataset->ttl + loadctx->now); /* XXX overflow check */
6692 newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6694 newheader->attributes = 0;
6695 newheader->trust = rdataset->trust;
6696 newheader->serial = 1;
6697 newheader->noqname = NULL;
6698 newheader->closest = NULL;
6699 newheader->count = init_count++;
6700 newheader->additional_auth = NULL;
6701 newheader->additional_glue = NULL;
6702 newheader->last_used = 0;
6703 newheader->node = node;
6704 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6705 newheader->attributes |= RDATASET_ATTR_RESIGN;
6706 newheader->resign = rdataset->resign;
6708 newheader->resign = 0;
6710 result = add(rbtdb, node, rbtdb->current_version, newheader,
6711 DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6712 if (result == ISC_R_SUCCESS &&
6713 delegating_type(rbtdb, node, rdataset->type))
6714 node->find_callback = 1;
6715 else if (result == DNS_R_UNCHANGED)
6716 result = ISC_R_SUCCESS;
6722 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6723 rbtdb_load_t *loadctx;
6726 rbtdb = (dns_rbtdb_t *)db;
6728 REQUIRE(VALID_RBTDB(rbtdb));
6730 loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6731 if (loadctx == NULL)
6732 return (ISC_R_NOMEMORY);
6734 loadctx->rbtdb = rbtdb;
6735 if (IS_CACHE(rbtdb))
6736 isc_stdtime_get(&loadctx->now);
6740 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6742 REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6744 rbtdb->attributes |= RBTDB_ATTR_LOADING;
6746 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6748 *addp = loading_addrdataset;
6751 return (ISC_R_SUCCESS);
6755 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6756 rbtdb_load_t *loadctx;
6757 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6759 REQUIRE(VALID_RBTDB(rbtdb));
6760 REQUIRE(dbloadp != NULL);
6762 REQUIRE(loadctx->rbtdb == rbtdb);
6764 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6766 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6767 REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6769 rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6770 rbtdb->attributes |= RBTDB_ATTR_LOADED;
6772 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6775 * If there's a KEY rdataset at the zone origin containing a
6776 * zone key, we consider the zone secure.
6778 if (! IS_CACHE(rbtdb))
6779 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6783 isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6785 return (ISC_R_SUCCESS);
6789 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6790 dns_masterformat_t masterformat) {
6793 rbtdb = (dns_rbtdb_t *)db;
6795 REQUIRE(VALID_RBTDB(rbtdb));
6798 return (dns_master_dump2(rbtdb->common.mctx, db, version,
6799 &dns_master_style_default,
6800 filename, masterformat));
6804 UNUSED(masterformat);
6806 return (ISC_R_NOTIMPLEMENTED);
6811 delete_callback(void *data, void *arg) {
6812 dns_rbtdb_t *rbtdb = arg;
6813 rdatasetheader_t *current, *next;
6814 unsigned int locknum;
6817 locknum = current->node->locknum;
6818 NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6819 while (current != NULL) {
6820 next = current->next;
6821 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6824 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6827 static isc_boolean_t
6828 issecure(dns_db_t *db) {
6830 isc_boolean_t secure;
6832 rbtdb = (dns_rbtdb_t *)db;
6834 REQUIRE(VALID_RBTDB(rbtdb));
6836 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6837 secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6838 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6843 static isc_boolean_t
6844 isdnssec(dns_db_t *db) {
6846 isc_boolean_t dnssec;
6848 rbtdb = (dns_rbtdb_t *)db;
6850 REQUIRE(VALID_RBTDB(rbtdb));
6852 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6853 dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6854 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6860 nodecount(dns_db_t *db) {
6864 rbtdb = (dns_rbtdb_t *)db;
6866 REQUIRE(VALID_RBTDB(rbtdb));
6868 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6869 count = dns_rbt_nodecount(rbtdb->tree);
6870 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6876 settask(dns_db_t *db, isc_task_t *task) {
6879 rbtdb = (dns_rbtdb_t *)db;
6881 REQUIRE(VALID_RBTDB(rbtdb));
6883 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6884 if (rbtdb->task != NULL)
6885 isc_task_detach(&rbtdb->task);
6887 isc_task_attach(task, &rbtdb->task);
6888 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6891 static isc_boolean_t
6892 ispersistent(dns_db_t *db) {
6898 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6899 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6900 dns_rbtnode_t *onode;
6901 isc_result_t result = ISC_R_SUCCESS;
6903 REQUIRE(VALID_RBTDB(rbtdb));
6904 REQUIRE(nodep != NULL && *nodep == NULL);
6906 /* Note that the access to origin_node doesn't require a DB lock */
6907 onode = (dns_rbtnode_t *)rbtdb->origin_node;
6908 if (onode != NULL) {
6909 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6910 new_reference(rbtdb, onode);
6911 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6913 *nodep = rbtdb->origin_node;
6915 INSIST(IS_CACHE(rbtdb));
6916 result = ISC_R_NOTFOUND;
6923 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6924 isc_uint8_t *flags, isc_uint16_t *iterations,
6925 unsigned char *salt, size_t *salt_length)
6928 isc_result_t result = ISC_R_NOTFOUND;
6929 rbtdb_version_t *rbtversion = version;
6931 rbtdb = (dns_rbtdb_t *)db;
6933 REQUIRE(VALID_RBTDB(rbtdb));
6935 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6937 if (rbtversion == NULL)
6938 rbtversion = rbtdb->current_version;
6940 if (rbtversion->havensec3) {
6942 *hash = rbtversion->hash;
6943 if (salt != NULL && salt_length != NULL) {
6944 REQUIRE(*salt_length >= rbtversion->salt_length);
6945 memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6947 if (salt_length != NULL)
6948 *salt_length = rbtversion->salt_length;
6949 if (iterations != NULL)
6950 *iterations = rbtversion->iterations;
6952 *flags = rbtversion->flags;
6953 result = ISC_R_SUCCESS;
6955 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6961 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6962 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6963 isc_stdtime_t oldresign;
6964 isc_result_t result = ISC_R_SUCCESS;
6965 rdatasetheader_t *header;
6967 REQUIRE(VALID_RBTDB(rbtdb));
6968 REQUIRE(!IS_CACHE(rbtdb));
6969 REQUIRE(rdataset != NULL);
6971 header = rdataset->private3;
6974 NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6975 isc_rwlocktype_write);
6977 oldresign = header->resign;
6978 header->resign = resign;
6979 if (header->heap_index != 0) {
6980 INSIST(RESIGN(header));
6982 isc_heap_delete(rbtdb->heaps[header->node->locknum],
6983 header->heap_index);
6984 header->heap_index = 0;
6985 } else if (resign < oldresign)
6986 isc_heap_increased(rbtdb->heaps[header->node->locknum],
6987 header->heap_index);
6988 else if (resign > oldresign)
6989 isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6990 header->heap_index);
6991 } else if (resign && header->heap_index == 0) {
6992 header->attributes |= RDATASET_ATTR_RESIGN;
6993 result = resign_insert(rbtdb, header->node->locknum, header);
6995 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6996 isc_rwlocktype_write);
7001 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7002 dns_name_t *foundname)
7004 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7005 rdatasetheader_t *header = NULL, *this;
7007 isc_result_t result = ISC_R_NOTFOUND;
7008 unsigned int locknum;
7010 REQUIRE(VALID_RBTDB(rbtdb));
7012 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
7014 for (i = 0; i < rbtdb->node_lock_count; i++) {
7015 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7016 this = isc_heap_element(rbtdb->heaps[i], 1);
7018 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7019 isc_rwlocktype_read);
7024 else if (isc_serial_lt(this->resign, header->resign)) {
7025 locknum = header->node->locknum;
7026 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7027 isc_rwlocktype_read);
7030 NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7031 isc_rwlocktype_read);
7037 bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7039 if (foundname != NULL)
7040 dns_rbt_fullnamefromnode(header->node, foundname);
7042 NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7043 isc_rwlocktype_read);
7045 result = ISC_R_SUCCESS;
7048 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
7054 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7056 rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7057 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7058 dns_rbtnode_t *node;
7059 rdatasetheader_t *header;
7061 REQUIRE(VALID_RBTDB(rbtdb));
7062 REQUIRE(rdataset != NULL);
7063 REQUIRE(rbtdb->future_version == rbtversion);
7064 REQUIRE(rbtversion->writer);
7066 node = rdataset->private2;
7067 header = rdataset->private3;
7070 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7071 NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7072 isc_rwlocktype_write);
7074 * Delete from heap and save to re-signed list so that it can
7075 * be restored if we backout of this change.
7077 new_reference(rbtdb, node);
7078 isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7079 header->heap_index = 0;
7080 ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7082 NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7083 isc_rwlocktype_write);
7084 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7087 static dns_stats_t *
7088 getrrsetstats(dns_db_t *db) {
7089 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7091 REQUIRE(VALID_RBTDB(rbtdb));
7092 REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7094 return (rbtdb->rrsetstats);
7097 static dns_dbmethods_t zone_methods = {
7136 static dns_dbmethods_t cache_methods = {
7176 #ifdef DNS_RBTDB_VERSION64
7181 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7182 dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7183 void *driverarg, dns_db_t **dbp)
7186 isc_result_t result;
7189 isc_boolean_t (*sooner)(void *, void *);
7191 /* Keep the compiler happy. */
7196 rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7198 return (ISC_R_NOMEMORY);
7200 memset(rbtdb, '\0', sizeof(*rbtdb));
7201 dns_name_init(&rbtdb->common.origin, NULL);
7202 rbtdb->common.attributes = 0;
7203 if (type == dns_dbtype_cache) {
7204 rbtdb->common.methods = &cache_methods;
7205 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7206 } else if (type == dns_dbtype_stub) {
7207 rbtdb->common.methods = &zone_methods;
7208 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7210 rbtdb->common.methods = &zone_methods;
7211 rbtdb->common.rdclass = rdclass;
7212 rbtdb->common.mctx = NULL;
7214 result = RBTDB_INITLOCK(&rbtdb->lock);
7215 if (result != ISC_R_SUCCESS)
7218 result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7219 if (result != ISC_R_SUCCESS)
7223 * Initialize node_lock_count in a generic way to support future
7224 * extension which allows the user to specify this value on creation.
7225 * Note that when specified for a cache DB it must be larger than 1
7226 * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7228 if (rbtdb->node_lock_count == 0) {
7229 if (IS_CACHE(rbtdb))
7230 rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7232 rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7233 } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7234 result = ISC_R_RANGE;
7235 goto cleanup_tree_lock;
7237 INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7238 rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7239 sizeof(rbtdb_nodelock_t));
7240 if (rbtdb->node_locks == NULL) {
7241 result = ISC_R_NOMEMORY;
7242 goto cleanup_tree_lock;
7245 rbtdb->rrsetstats = NULL;
7246 if (IS_CACHE(rbtdb)) {
7247 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7248 if (result != ISC_R_SUCCESS)
7249 goto cleanup_node_locks;
7250 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7251 sizeof(rdatasetheaderlist_t));
7252 if (rbtdb->rdatasets == NULL) {
7253 result = ISC_R_NOMEMORY;
7254 goto cleanup_rrsetstats;
7256 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7257 ISC_LIST_INIT(rbtdb->rdatasets[i]);
7259 rbtdb->rdatasets = NULL;
7264 rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7265 sizeof(isc_heap_t *));
7266 if (rbtdb->heaps == NULL) {
7267 result = ISC_R_NOMEMORY;
7268 goto cleanup_rdatasets;
7270 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7271 rbtdb->heaps[i] = NULL;
7272 sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7273 for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7274 result = isc_heap_create(mctx, sooner, set_index, 0,
7276 if (result != ISC_R_SUCCESS)
7281 * Create deadnode lists.
7283 rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7284 sizeof(rbtnodelist_t));
7285 if (rbtdb->deadnodes == NULL) {
7286 result = ISC_R_NOMEMORY;
7289 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7290 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7292 rbtdb->active = rbtdb->node_lock_count;
7294 for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7295 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7296 if (result == ISC_R_SUCCESS) {
7297 result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7298 if (result != ISC_R_SUCCESS)
7299 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7301 if (result != ISC_R_SUCCESS) {
7303 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7304 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7305 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7307 goto cleanup_deadnodes;
7309 rbtdb->node_locks[i].exiting = ISC_FALSE;
7313 * Attach to the mctx. The database will persist so long as there
7314 * are references to it, and attaching to the mctx ensures that our
7315 * mctx won't disappear out from under us.
7317 isc_mem_attach(mctx, &rbtdb->common.mctx);
7320 * Must be initialized before free_rbtdb() is called.
7322 isc_ondestroy_init(&rbtdb->common.ondest);
7325 * Make a copy of the origin name.
7327 result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7328 if (result != ISC_R_SUCCESS) {
7329 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7334 * Make the Red-Black Trees.
7336 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7337 if (result != ISC_R_SUCCESS) {
7338 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7342 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7343 if (result != ISC_R_SUCCESS) {
7344 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7348 result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7349 if (result != ISC_R_SUCCESS) {
7350 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7355 * In order to set the node callback bit correctly in zone databases,
7356 * we need to know if the node has the origin name of the zone.
7357 * In loading_addrdataset() we could simply compare the new name
7358 * to the origin name, but this is expensive. Also, we don't know the
7359 * node name in addrdataset(), so we need another way of knowing the
7362 * We now explicitly create a node for the zone's origin, and then
7363 * we simply remember the node's address. This is safe, because
7364 * the top-of-zone node can never be deleted, nor can its address
7367 if (!IS_CACHE(rbtdb)) {
7368 rbtdb->origin_node = NULL;
7369 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7370 &rbtdb->origin_node);
7371 if (result != ISC_R_SUCCESS) {
7372 INSIST(result != ISC_R_EXISTS);
7373 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7376 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7378 * We need to give the origin node the right locknum.
7380 dns_name_init(&name, NULL);
7381 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7382 #ifdef DNS_RBT_USEHASH
7383 rbtdb->origin_node->locknum =
7384 rbtdb->origin_node->hashval %
7385 rbtdb->node_lock_count;
7387 rbtdb->origin_node->locknum =
7388 dns_name_hash(&name, ISC_TRUE) %
7389 rbtdb->node_lock_count;
7394 * Misc. Initialization.
7396 result = isc_refcount_init(&rbtdb->references, 1);
7397 if (result != ISC_R_SUCCESS) {
7398 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7401 rbtdb->attributes = 0;
7402 rbtdb->overmem = ISC_FALSE;
7406 * Version Initialization.
7408 rbtdb->current_serial = 1;
7409 rbtdb->least_serial = 1;
7410 rbtdb->next_serial = 2;
7411 rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7412 if (rbtdb->current_version == NULL) {
7413 isc_refcount_decrement(&rbtdb->references, NULL);
7414 isc_refcount_destroy(&rbtdb->references);
7415 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7416 return (ISC_R_NOMEMORY);
7418 rbtdb->current_version->secure = dns_db_insecure;
7419 rbtdb->current_version->havensec3 = ISC_FALSE;
7420 rbtdb->current_version->flags = 0;
7421 rbtdb->current_version->iterations = 0;
7422 rbtdb->current_version->hash = 0;
7423 rbtdb->current_version->salt_length = 0;
7424 memset(rbtdb->current_version->salt, 0,
7425 sizeof(rbtdb->current_version->salt));
7426 rbtdb->future_version = NULL;
7427 ISC_LIST_INIT(rbtdb->open_versions);
7429 * Keep the current version in the open list so that list operation
7430 * won't happen in normal lookup operations.
7432 PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7434 rbtdb->common.magic = DNS_DB_MAGIC;
7435 rbtdb->common.impmagic = RBTDB_MAGIC;
7437 *dbp = (dns_db_t *)rbtdb;
7439 return (ISC_R_SUCCESS);
7442 isc_mem_put(mctx, rbtdb->deadnodes,
7443 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7446 if (rbtdb->heaps != NULL) {
7447 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7448 if (rbtdb->heaps[i] != NULL)
7449 isc_heap_destroy(&rbtdb->heaps[i]);
7450 isc_mem_put(mctx, rbtdb->heaps,
7451 rbtdb->node_lock_count * sizeof(isc_heap_t *));
7455 if (rbtdb->rdatasets != NULL)
7456 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7457 sizeof(rdatasetheaderlist_t));
7459 if (rbtdb->rrsetstats != NULL)
7460 dns_stats_detach(&rbtdb->rrsetstats);
7463 isc_mem_put(mctx, rbtdb->node_locks,
7464 rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7467 isc_rwlock_destroy(&rbtdb->tree_lock);
7470 RBTDB_DESTROYLOCK(&rbtdb->lock);
7473 isc_mem_put(mctx, rbtdb, sizeof(*rbtdb));
7479 * Slabbed Rdataset Methods
7483 rdataset_disassociate(dns_rdataset_t *rdataset) {
7484 dns_db_t *db = rdataset->private1;
7485 dns_dbnode_t *node = rdataset->private2;
7487 detachnode(db, &node);
7491 rdataset_first(dns_rdataset_t *rdataset) {
7492 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7495 count = raw[0] * 256 + raw[1];
7497 rdataset->private5 = NULL;
7498 return (ISC_R_NOMORE);
7501 #if DNS_RDATASET_FIXED
7502 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7503 raw += 2 + (4 * count);
7509 * The privateuint4 field is the number of rdata beyond the
7510 * cursor position, so we decrement the total count by one
7511 * before storing it.
7513 * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7514 * first record. If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7515 * to the first entry in the offset table.
7518 rdataset->privateuint4 = count;
7519 rdataset->private5 = raw;
7521 return (ISC_R_SUCCESS);
7525 rdataset_next(dns_rdataset_t *rdataset) {
7527 unsigned int length;
7528 unsigned char *raw; /* RDATASLAB */
7530 count = rdataset->privateuint4;
7532 return (ISC_R_NOMORE);
7534 rdataset->privateuint4 = count;
7537 * Skip forward one record (length + 4) or one offset (4).
7539 raw = rdataset->private5;
7540 #if DNS_RDATASET_FIXED
7541 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7543 length = raw[0] * 256 + raw[1];
7545 #if DNS_RDATASET_FIXED
7547 rdataset->private5 = raw + 4; /* length(2) + order(2) */
7549 rdataset->private5 = raw + 2; /* length(2) */
7552 return (ISC_R_SUCCESS);
7556 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7557 unsigned char *raw = rdataset->private5; /* RDATASLAB */
7558 #if DNS_RDATASET_FIXED
7559 unsigned int offset;
7561 unsigned int length;
7563 unsigned int flags = 0;
7565 REQUIRE(raw != NULL);
7568 * Find the start of the record if not already in private5
7569 * then skip the length and order fields.
7571 #if DNS_RDATASET_FIXED
7572 if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7573 offset = (raw[0] << 24) + (raw[1] << 16) +
7574 (raw[2] << 8) + raw[3];
7575 raw = rdataset->private3;
7579 length = raw[0] * 256 + raw[1];
7580 #if DNS_RDATASET_FIXED
7585 if (rdataset->type == dns_rdatatype_rrsig) {
7586 if (*raw & DNS_RDATASLAB_OFFLINE)
7587 flags |= DNS_RDATA_OFFLINE;
7593 dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7594 rdata->flags |= flags;
7598 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7599 dns_db_t *db = source->private1;
7600 dns_dbnode_t *node = source->private2;
7601 dns_dbnode_t *cloned_node = NULL;
7603 attachnode(db, node, &cloned_node);
7607 * Reset iterator state.
7609 target->privateuint4 = 0;
7610 target->private5 = NULL;
7614 rdataset_count(dns_rdataset_t *rdataset) {
7615 unsigned char *raw = rdataset->private3; /* RDATASLAB */
7618 count = raw[0] * 256 + raw[1];
7624 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7625 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7627 dns_db_t *db = rdataset->private1;
7628 dns_dbnode_t *node = rdataset->private2;
7629 dns_dbnode_t *cloned_node;
7630 struct noqname *noqname = rdataset->private6;
7633 attachnode(db, node, &cloned_node);
7634 nsec->methods = &rdataset_methods;
7635 nsec->rdclass = db->rdclass;
7636 nsec->type = noqname->type;
7638 nsec->ttl = rdataset->ttl;
7639 nsec->trust = rdataset->trust;
7640 nsec->private1 = rdataset->private1;
7641 nsec->private2 = rdataset->private2;
7642 nsec->private3 = noqname->neg;
7643 nsec->privateuint4 = 0;
7644 nsec->private5 = NULL;
7645 nsec->private6 = NULL;
7646 nsec->private7 = NULL;
7649 attachnode(db, node, &cloned_node);
7650 nsecsig->methods = &rdataset_methods;
7651 nsecsig->rdclass = db->rdclass;
7652 nsecsig->type = dns_rdatatype_rrsig;
7653 nsecsig->covers = noqname->type;
7654 nsecsig->ttl = rdataset->ttl;
7655 nsecsig->trust = rdataset->trust;
7656 nsecsig->private1 = rdataset->private1;
7657 nsecsig->private2 = rdataset->private2;
7658 nsecsig->private3 = noqname->negsig;
7659 nsecsig->privateuint4 = 0;
7660 nsecsig->private5 = NULL;
7661 nsec->private6 = NULL;
7662 nsec->private7 = NULL;
7664 dns_name_clone(&noqname->name, name);
7666 return (ISC_R_SUCCESS);
7670 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7671 dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7673 dns_db_t *db = rdataset->private1;
7674 dns_dbnode_t *node = rdataset->private2;
7675 dns_dbnode_t *cloned_node;
7676 struct noqname *closest = rdataset->private7;
7679 attachnode(db, node, &cloned_node);
7680 nsec->methods = &rdataset_methods;
7681 nsec->rdclass = db->rdclass;
7682 nsec->type = closest->type;
7684 nsec->ttl = rdataset->ttl;
7685 nsec->trust = rdataset->trust;
7686 nsec->private1 = rdataset->private1;
7687 nsec->private2 = rdataset->private2;
7688 nsec->private3 = closest->neg;
7689 nsec->privateuint4 = 0;
7690 nsec->private5 = NULL;
7691 nsec->private6 = NULL;
7692 nsec->private7 = NULL;
7695 attachnode(db, node, &cloned_node);
7696 nsecsig->methods = &rdataset_methods;
7697 nsecsig->rdclass = db->rdclass;
7698 nsecsig->type = dns_rdatatype_rrsig;
7699 nsecsig->covers = closest->type;
7700 nsecsig->ttl = rdataset->ttl;
7701 nsecsig->trust = rdataset->trust;
7702 nsecsig->private1 = rdataset->private1;
7703 nsecsig->private2 = rdataset->private2;
7704 nsecsig->private3 = closest->negsig;
7705 nsecsig->privateuint4 = 0;
7706 nsecsig->private5 = NULL;
7707 nsec->private6 = NULL;
7708 nsec->private7 = NULL;
7710 dns_name_clone(&closest->name, name);
7712 return (ISC_R_SUCCESS);
7716 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7717 dns_rbtdb_t *rbtdb = rdataset->private1;
7718 dns_rbtnode_t *rbtnode = rdataset->private2;
7719 rdatasetheader_t *header = rdataset->private3;
7722 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7723 isc_rwlocktype_write);
7724 header->trust = rdataset->trust = trust;
7725 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7726 isc_rwlocktype_write);
7730 rdataset_expire(dns_rdataset_t *rdataset) {
7731 dns_rbtdb_t *rbtdb = rdataset->private1;
7732 dns_rbtnode_t *rbtnode = rdataset->private2;
7733 rdatasetheader_t *header = rdataset->private3;
7736 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7737 isc_rwlocktype_write);
7738 expire_header(rbtdb, header, ISC_FALSE);
7739 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7740 isc_rwlocktype_write);
7744 * Rdataset Iterator Methods
7748 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7749 rbtdb_rdatasetiter_t *rbtiterator;
7751 rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7753 if (rbtiterator->common.version != NULL)
7754 closeversion(rbtiterator->common.db,
7755 &rbtiterator->common.version, ISC_FALSE);
7756 detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7757 isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7758 sizeof(*rbtiterator));
7764 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7765 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7766 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7767 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7768 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7769 rdatasetheader_t *header, *top_next;
7770 rbtdb_serial_t serial;
7773 if (IS_CACHE(rbtdb)) {
7775 now = rbtiterator->common.now;
7777 serial = rbtversion->serial;
7781 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7782 isc_rwlocktype_read);
7784 for (header = rbtnode->data; header != NULL; header = top_next) {
7785 top_next = header->next;
7787 if (header->serial <= serial && !IGNORE(header)) {
7789 * Is this a "this rdataset doesn't exist"
7790 * record? Or is it too old in the cache?
7792 * Note: unlike everywhere else, we
7793 * check for now > header->rdh_ttl instead
7794 * of now >= header->rdh_ttl. This allows
7795 * ANY and RRSIG queries for 0 TTL
7796 * rdatasets to work.
7798 if (NONEXISTENT(header) ||
7799 (now != 0 && now > header->rdh_ttl))
7803 header = header->down;
7804 } while (header != NULL);
7809 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7810 isc_rwlocktype_read);
7812 rbtiterator->current = header;
7815 return (ISC_R_NOMORE);
7817 return (ISC_R_SUCCESS);
7821 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7822 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7823 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7824 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7825 rbtdb_version_t *rbtversion = rbtiterator->common.version;
7826 rdatasetheader_t *header, *top_next;
7827 rbtdb_serial_t serial;
7829 rbtdb_rdatatype_t type, negtype;
7830 dns_rdatatype_t rdtype, covers;
7832 header = rbtiterator->current;
7834 return (ISC_R_NOMORE);
7836 if (IS_CACHE(rbtdb)) {
7838 now = rbtiterator->common.now;
7840 serial = rbtversion->serial;
7844 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7845 isc_rwlocktype_read);
7847 type = header->type;
7848 rdtype = RBTDB_RDATATYPE_BASE(header->type);
7850 covers = RBTDB_RDATATYPE_EXT(header->type);
7851 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7853 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7854 for (header = header->next; header != NULL; header = top_next) {
7855 top_next = header->next;
7857 * If not walking back up the down list.
7859 if (header->type != type && header->type != negtype) {
7861 if (header->serial <= serial &&
7864 * Is this a "this rdataset doesn't
7867 * Note: unlike everywhere else, we
7868 * check for now > header->ttl instead
7869 * of now >= header->ttl. This allows
7870 * ANY and RRSIG queries for 0 TTL
7871 * rdatasets to work.
7873 if ((header->attributes &
7874 RDATASET_ATTR_NONEXISTENT) != 0 ||
7875 (now != 0 && now > header->rdh_ttl))
7879 header = header->down;
7880 } while (header != NULL);
7886 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7887 isc_rwlocktype_read);
7889 rbtiterator->current = header;
7892 return (ISC_R_NOMORE);
7894 return (ISC_R_SUCCESS);
7898 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7899 rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7900 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7901 dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7902 rdatasetheader_t *header;
7904 header = rbtiterator->current;
7905 REQUIRE(header != NULL);
7907 NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7908 isc_rwlocktype_read);
7910 bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7913 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7914 isc_rwlocktype_read);
7919 * Database Iterator Methods
7923 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7924 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7925 dns_rbtnode_t *node = rbtdbiter->node;
7930 INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7931 reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7935 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7936 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7937 dns_rbtnode_t *node = rbtdbiter->node;
7943 lock = &rbtdb->node_locks[node->locknum].lock;
7944 NODE_LOCK(lock, isc_rwlocktype_read);
7945 decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7946 rbtdbiter->tree_locked, ISC_FALSE);
7947 NODE_UNLOCK(lock, isc_rwlocktype_read);
7949 rbtdbiter->node = NULL;
7953 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7954 dns_rbtnode_t *node;
7955 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7956 isc_boolean_t was_read_locked = ISC_FALSE;
7960 if (rbtdbiter->delete != 0) {
7962 * Note that "%d node of %d in tree" can report things like
7963 * "flush_deletions: 59 nodes of 41 in tree". This means
7964 * That some nodes appear on the deletions list more than
7965 * once. Only the last occurence will actually be deleted.
7967 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7968 DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7969 "flush_deletions: %d nodes of %d in tree",
7971 dns_rbt_nodecount(rbtdb->tree));
7973 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7974 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7975 was_read_locked = ISC_TRUE;
7977 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7978 rbtdbiter->tree_locked = isc_rwlocktype_write;
7980 for (i = 0; i < rbtdbiter->delete; i++) {
7981 node = rbtdbiter->deletions[i];
7982 lock = &rbtdb->node_locks[node->locknum].lock;
7984 NODE_LOCK(lock, isc_rwlocktype_read);
7985 decrement_reference(rbtdb, node, 0,
7986 isc_rwlocktype_read,
7987 rbtdbiter->tree_locked, ISC_FALSE);
7988 NODE_UNLOCK(lock, isc_rwlocktype_read);
7991 rbtdbiter->delete = 0;
7993 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7994 if (was_read_locked) {
7995 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7996 rbtdbiter->tree_locked = isc_rwlocktype_read;
7999 rbtdbiter->tree_locked = isc_rwlocktype_none;
8005 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8006 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8008 REQUIRE(rbtdbiter->paused);
8009 REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8011 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8012 rbtdbiter->tree_locked = isc_rwlocktype_read;
8014 rbtdbiter->paused = ISC_FALSE;
8018 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8019 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8020 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8021 dns_db_t *db = NULL;
8023 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8024 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8025 rbtdbiter->tree_locked = isc_rwlocktype_none;
8027 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8029 dereference_iter_node(rbtdbiter);
8031 flush_deletions(rbtdbiter);
8033 dns_db_attach(rbtdbiter->common.db, &db);
8034 dns_db_detach(&rbtdbiter->common.db);
8036 dns_rbtnodechain_reset(&rbtdbiter->chain);
8037 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8038 isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8045 dbiterator_first(dns_dbiterator_t *iterator) {
8046 isc_result_t result;
8047 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8048 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8049 dns_name_t *name, *origin;
8051 if (rbtdbiter->result != ISC_R_SUCCESS &&
8052 rbtdbiter->result != ISC_R_NOMORE)
8053 return (rbtdbiter->result);
8055 if (rbtdbiter->paused)
8056 resume_iteration(rbtdbiter);
8058 dereference_iter_node(rbtdbiter);
8060 name = dns_fixedname_name(&rbtdbiter->name);
8061 origin = dns_fixedname_name(&rbtdbiter->origin);
8062 dns_rbtnodechain_reset(&rbtdbiter->chain);
8063 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8065 if (rbtdbiter->nsec3only) {
8066 rbtdbiter->current = &rbtdbiter->nsec3chain;
8067 result = dns_rbtnodechain_first(rbtdbiter->current,
8068 rbtdb->nsec3, name, origin);
8070 rbtdbiter->current = &rbtdbiter->chain;
8071 result = dns_rbtnodechain_first(rbtdbiter->current,
8072 rbtdb->tree, name, origin);
8073 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8074 rbtdbiter->current = &rbtdbiter->nsec3chain;
8075 result = dns_rbtnodechain_first(rbtdbiter->current,
8080 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8081 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8082 NULL, &rbtdbiter->node);
8083 if (result == ISC_R_SUCCESS) {
8084 rbtdbiter->new_origin = ISC_TRUE;
8085 reference_iter_node(rbtdbiter);
8088 INSIST(result == ISC_R_NOTFOUND);
8089 result = ISC_R_NOMORE; /* The tree is empty. */
8092 rbtdbiter->result = result;
8098 dbiterator_last(dns_dbiterator_t *iterator) {
8099 isc_result_t result;
8100 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8101 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8102 dns_name_t *name, *origin;
8104 if (rbtdbiter->result != ISC_R_SUCCESS &&
8105 rbtdbiter->result != ISC_R_NOMORE)
8106 return (rbtdbiter->result);
8108 if (rbtdbiter->paused)
8109 resume_iteration(rbtdbiter);
8111 dereference_iter_node(rbtdbiter);
8113 name = dns_fixedname_name(&rbtdbiter->name);
8114 origin = dns_fixedname_name(&rbtdbiter->origin);
8115 dns_rbtnodechain_reset(&rbtdbiter->chain);
8116 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8118 result = ISC_R_NOTFOUND;
8119 if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8120 rbtdbiter->current = &rbtdbiter->nsec3chain;
8121 result = dns_rbtnodechain_last(rbtdbiter->current,
8122 rbtdb->nsec3, name, origin);
8124 if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8125 rbtdbiter->current = &rbtdbiter->chain;
8126 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8129 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8130 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8131 NULL, &rbtdbiter->node);
8132 if (result == ISC_R_SUCCESS) {
8133 rbtdbiter->new_origin = ISC_TRUE;
8134 reference_iter_node(rbtdbiter);
8137 INSIST(result == ISC_R_NOTFOUND);
8138 result = ISC_R_NOMORE; /* The tree is empty. */
8141 rbtdbiter->result = result;
8147 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8148 isc_result_t result;
8149 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8150 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8151 dns_name_t *iname, *origin;
8153 if (rbtdbiter->result != ISC_R_SUCCESS &&
8154 rbtdbiter->result != ISC_R_NOTFOUND &&
8155 rbtdbiter->result != ISC_R_NOMORE)
8156 return (rbtdbiter->result);
8158 if (rbtdbiter->paused)
8159 resume_iteration(rbtdbiter);
8161 dereference_iter_node(rbtdbiter);
8163 iname = dns_fixedname_name(&rbtdbiter->name);
8164 origin = dns_fixedname_name(&rbtdbiter->origin);
8165 dns_rbtnodechain_reset(&rbtdbiter->chain);
8166 dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8168 if (rbtdbiter->nsec3only) {
8169 rbtdbiter->current = &rbtdbiter->nsec3chain;
8170 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8173 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8174 } else if (rbtdbiter->nonsec3) {
8175 rbtdbiter->current = &rbtdbiter->chain;
8176 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8179 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8182 * Stay on main chain if not found on either chain.
8184 rbtdbiter->current = &rbtdbiter->chain;
8185 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8188 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8189 if (result == DNS_R_PARTIALMATCH) {
8190 dns_rbtnode_t *node = NULL;
8191 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8192 &node, &rbtdbiter->nsec3chain,
8193 DNS_RBTFIND_EMPTYDATA,
8195 if (result == ISC_R_SUCCESS) {
8196 rbtdbiter->node = node;
8197 rbtdbiter->current = &rbtdbiter->nsec3chain;
8203 if (result == ISC_R_SUCCESS) {
8204 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8206 if (result == ISC_R_SUCCESS) {
8207 rbtdbiter->new_origin = ISC_TRUE;
8208 reference_iter_node(rbtdbiter);
8210 } else if (result == DNS_R_PARTIALMATCH) {
8211 result = ISC_R_NOTFOUND;
8212 rbtdbiter->node = NULL;
8215 rbtdbiter->result = result;
8217 if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8218 isc_result_t tresult;
8219 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8221 if (tresult == ISC_R_SUCCESS) {
8222 rbtdbiter->new_origin = ISC_TRUE;
8223 reference_iter_node(rbtdbiter);
8226 rbtdbiter->node = NULL;
8229 rbtdbiter->node = NULL;
8231 rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8232 ISC_R_SUCCESS : result;
8239 dbiterator_prev(dns_dbiterator_t *iterator) {
8240 isc_result_t result;
8241 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8242 dns_name_t *name, *origin;
8243 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8245 REQUIRE(rbtdbiter->node != NULL);
8247 if (rbtdbiter->result != ISC_R_SUCCESS)
8248 return (rbtdbiter->result);
8250 if (rbtdbiter->paused)
8251 resume_iteration(rbtdbiter);
8253 name = dns_fixedname_name(&rbtdbiter->name);
8254 origin = dns_fixedname_name(&rbtdbiter->origin);
8255 result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8256 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8257 !rbtdbiter->nonsec3 &&
8258 &rbtdbiter->nsec3chain == rbtdbiter->current) {
8259 rbtdbiter->current = &rbtdbiter->chain;
8260 dns_rbtnodechain_reset(rbtdbiter->current);
8261 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8263 if (result == ISC_R_NOTFOUND)
8264 result = ISC_R_NOMORE;
8267 dereference_iter_node(rbtdbiter);
8269 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8270 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8271 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8272 NULL, &rbtdbiter->node);
8275 if (result == ISC_R_SUCCESS)
8276 reference_iter_node(rbtdbiter);
8278 rbtdbiter->result = result;
8284 dbiterator_next(dns_dbiterator_t *iterator) {
8285 isc_result_t result;
8286 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8287 dns_name_t *name, *origin;
8288 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8290 REQUIRE(rbtdbiter->node != NULL);
8292 if (rbtdbiter->result != ISC_R_SUCCESS)
8293 return (rbtdbiter->result);
8295 if (rbtdbiter->paused)
8296 resume_iteration(rbtdbiter);
8298 name = dns_fixedname_name(&rbtdbiter->name);
8299 origin = dns_fixedname_name(&rbtdbiter->origin);
8300 result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8301 if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8302 !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8303 rbtdbiter->current = &rbtdbiter->nsec3chain;
8304 dns_rbtnodechain_reset(rbtdbiter->current);
8305 result = dns_rbtnodechain_first(rbtdbiter->current,
8306 rbtdb->nsec3, name, origin);
8307 if (result == ISC_R_NOTFOUND)
8308 result = ISC_R_NOMORE;
8311 dereference_iter_node(rbtdbiter);
8313 if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8314 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8315 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8316 NULL, &rbtdbiter->node);
8318 if (result == ISC_R_SUCCESS)
8319 reference_iter_node(rbtdbiter);
8321 rbtdbiter->result = result;
8327 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8330 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8331 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8332 dns_rbtnode_t *node = rbtdbiter->node;
8333 isc_result_t result;
8334 dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8335 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8337 REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8338 REQUIRE(rbtdbiter->node != NULL);
8340 if (rbtdbiter->paused)
8341 resume_iteration(rbtdbiter);
8344 if (rbtdbiter->common.relative_names)
8346 result = dns_name_concatenate(nodename, origin, name, NULL);
8347 if (result != ISC_R_SUCCESS)
8349 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8350 result = DNS_R_NEWORIGIN;
8352 result = ISC_R_SUCCESS;
8354 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8355 new_reference(rbtdb, node);
8356 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8358 *nodep = rbtdbiter->node;
8360 if (iterator->cleaning && result == ISC_R_SUCCESS) {
8361 isc_result_t expire_result;
8364 * If the deletion array is full, flush it before trying
8365 * to expire the current node. The current node can't
8366 * fully deleted while the iteration cursor is still on it.
8368 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8369 flush_deletions(rbtdbiter);
8371 expire_result = expirenode(iterator->db, *nodep, 0);
8374 * expirenode() currently always returns success.
8376 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8379 rbtdbiter->deletions[rbtdbiter->delete++] = node;
8380 NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8381 dns_rbtnode_refincrement(node, &refs);
8383 NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8391 dbiterator_pause(dns_dbiterator_t *iterator) {
8392 dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8393 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8395 if (rbtdbiter->result != ISC_R_SUCCESS &&
8396 rbtdbiter->result != ISC_R_NOMORE)
8397 return (rbtdbiter->result);
8399 if (rbtdbiter->paused)
8400 return (ISC_R_SUCCESS);
8402 rbtdbiter->paused = ISC_TRUE;
8404 if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8405 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8406 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8407 rbtdbiter->tree_locked = isc_rwlocktype_none;
8410 flush_deletions(rbtdbiter);
8412 return (ISC_R_SUCCESS);
8416 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8417 rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8418 dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8420 if (rbtdbiter->result != ISC_R_SUCCESS)
8421 return (rbtdbiter->result);
8423 return (dns_name_copy(origin, name, NULL));
8427 * Additional cache routines.
8430 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8431 dns_rdatatype_t qtype, dns_acache_t *acache,
8432 dns_zone_t **zonep, dns_db_t **dbp,
8433 dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8434 dns_name_t *fname, dns_message_t *msg,
8450 return (ISC_R_NOTIMPLEMENTED);
8452 dns_rbtdb_t *rbtdb = rdataset->private1;
8453 dns_rbtnode_t *rbtnode = rdataset->private2;
8454 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8455 unsigned int current_count = rdataset->privateuint4;
8457 rdatasetheader_t *header;
8458 nodelock_t *nodelock;
8459 unsigned int total_count;
8460 acachectl_t *acarray;
8461 dns_acacheentry_t *entry;
8462 isc_result_t result;
8464 UNUSED(qtype); /* we do not use this value at least for now */
8467 header = (struct rdatasetheader *)(raw - sizeof(*header));
8469 total_count = raw[0] * 256 + raw[1];
8470 INSIST(total_count > current_count);
8471 count = total_count - current_count - 1;
8475 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8476 NODE_LOCK(nodelock, isc_rwlocktype_read);
8479 case dns_rdatasetadditional_fromauth:
8480 acarray = header->additional_auth;
8482 case dns_rdatasetadditional_fromcache:
8485 case dns_rdatasetadditional_fromglue:
8486 acarray = header->additional_glue;
8492 if (acarray == NULL) {
8493 if (type != dns_rdatasetadditional_fromcache)
8494 dns_acache_countquerymiss(acache);
8495 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8496 return (ISC_R_NOTFOUND);
8499 if (acarray[count].entry == NULL) {
8500 dns_acache_countquerymiss(acache);
8501 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8502 return (ISC_R_NOTFOUND);
8506 dns_acache_attachentry(acarray[count].entry, &entry);
8508 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8510 result = dns_acache_getentry(entry, zonep, dbp, versionp,
8511 nodep, fname, msg, now);
8513 dns_acache_detachentry(&entry);
8519 acache_callback(dns_acacheentry_t *entry, void **arg) {
8521 dns_rbtnode_t *rbtnode;
8522 nodelock_t *nodelock;
8523 acachectl_t *acarray = NULL;
8524 acache_cbarg_t *cbarg;
8527 REQUIRE(arg != NULL);
8531 * The caller must hold the entry lock.
8534 rbtdb = (dns_rbtdb_t *)cbarg->db;
8535 rbtnode = (dns_rbtnode_t *)cbarg->node;
8537 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8538 NODE_LOCK(nodelock, isc_rwlocktype_write);
8540 switch (cbarg->type) {
8541 case dns_rdatasetadditional_fromauth:
8542 acarray = cbarg->header->additional_auth;
8544 case dns_rdatasetadditional_fromglue:
8545 acarray = cbarg->header->additional_glue;
8551 count = cbarg->count;
8552 if (acarray != NULL && acarray[count].entry == entry) {
8553 acarray[count].entry = NULL;
8554 INSIST(acarray[count].cbarg == cbarg);
8555 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8556 acarray[count].cbarg = NULL;
8558 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8560 dns_acache_detachentry(&entry);
8562 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8564 dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8565 dns_db_detach((dns_db_t **)(void*)&rbtdb);
8573 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8574 acache_cbarg_t **cbargp)
8576 acache_cbarg_t *cbarg;
8578 REQUIRE(mctx != NULL);
8579 REQUIRE(entry != NULL);
8580 REQUIRE(cbargp != NULL && *cbargp != NULL);
8584 dns_acache_cancelentry(entry);
8585 dns_db_detachnode(cbarg->db, &cbarg->node);
8586 dns_db_detach(&cbarg->db);
8588 isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8595 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8596 dns_rdatatype_t qtype, dns_acache_t *acache,
8597 dns_zone_t *zone, dns_db_t *db,
8598 dns_dbversion_t *version, dns_dbnode_t *node,
8612 return (ISC_R_NOTIMPLEMENTED);
8614 dns_rbtdb_t *rbtdb = rdataset->private1;
8615 dns_rbtnode_t *rbtnode = rdataset->private2;
8616 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8617 unsigned int current_count = rdataset->privateuint4;
8618 rdatasetheader_t *header;
8619 unsigned int total_count, count;
8620 nodelock_t *nodelock;
8621 isc_result_t result;
8622 acachectl_t *acarray;
8623 dns_acacheentry_t *newentry, *oldentry = NULL;
8624 acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8628 if (type == dns_rdatasetadditional_fromcache)
8629 return (ISC_R_SUCCESS);
8631 header = (struct rdatasetheader *)(raw - sizeof(*header));
8633 total_count = raw[0] * 256 + raw[1];
8634 INSIST(total_count > current_count);
8635 count = total_count - current_count - 1; /* should be private data */
8637 newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8638 if (newcbarg == NULL)
8639 return (ISC_R_NOMEMORY);
8640 newcbarg->type = type;
8641 newcbarg->count = count;
8642 newcbarg->header = header;
8643 newcbarg->db = NULL;
8644 dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8645 newcbarg->node = NULL;
8646 dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8649 result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8650 acache_callback, newcbarg, &newentry);
8651 if (result != ISC_R_SUCCESS)
8653 /* Set cache data in the new entry. */
8654 result = dns_acache_setentry(acache, newentry, zone, db,
8655 version, node, fname);
8656 if (result != ISC_R_SUCCESS)
8659 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8660 NODE_LOCK(nodelock, isc_rwlocktype_write);
8664 case dns_rdatasetadditional_fromauth:
8665 acarray = header->additional_auth;
8667 case dns_rdatasetadditional_fromglue:
8668 acarray = header->additional_glue;
8674 if (acarray == NULL) {
8677 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8678 sizeof(acachectl_t));
8680 if (acarray == NULL) {
8681 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8685 for (i = 0; i < total_count; i++) {
8686 acarray[i].entry = NULL;
8687 acarray[i].cbarg = NULL;
8691 case dns_rdatasetadditional_fromauth:
8692 header->additional_auth = acarray;
8694 case dns_rdatasetadditional_fromglue:
8695 header->additional_glue = acarray;
8701 if (acarray[count].entry != NULL) {
8703 * Swap the entry. Delay cleaning-up the old entry since
8704 * it would require a node lock.
8706 oldentry = acarray[count].entry;
8707 INSIST(acarray[count].cbarg != NULL);
8708 oldcbarg = acarray[count].cbarg;
8710 acarray[count].entry = newentry;
8711 acarray[count].cbarg = newcbarg;
8713 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8715 if (oldentry != NULL) {
8716 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8717 dns_acache_detachentry(&oldentry);
8720 return (ISC_R_SUCCESS);
8723 if (newcbarg != NULL) {
8724 if (newentry != NULL) {
8725 acache_cancelentry(rbtdb->common.mctx, newentry,
8727 dns_acache_detachentry(&newentry);
8729 dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8730 dns_db_detach(&newcbarg->db);
8731 isc_mem_put(rbtdb->common.mctx, newcbarg,
8741 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8742 dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8750 return (ISC_R_NOTIMPLEMENTED);
8752 dns_rbtdb_t *rbtdb = rdataset->private1;
8753 dns_rbtnode_t *rbtnode = rdataset->private2;
8754 unsigned char *raw = rdataset->private3; /* RDATASLAB */
8755 unsigned int current_count = rdataset->privateuint4;
8756 rdatasetheader_t *header;
8757 nodelock_t *nodelock;
8758 unsigned int total_count, count;
8759 acachectl_t *acarray;
8760 dns_acacheentry_t *entry;
8761 acache_cbarg_t *cbarg;
8763 UNUSED(qtype); /* we do not use this value at least for now */
8766 if (type == dns_rdatasetadditional_fromcache)
8767 return (ISC_R_SUCCESS);
8769 header = (struct rdatasetheader *)(raw - sizeof(*header));
8771 total_count = raw[0] * 256 + raw[1];
8772 INSIST(total_count > current_count);
8773 count = total_count - current_count - 1;
8778 nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8779 NODE_LOCK(nodelock, isc_rwlocktype_write);
8782 case dns_rdatasetadditional_fromauth:
8783 acarray = header->additional_auth;
8785 case dns_rdatasetadditional_fromglue:
8786 acarray = header->additional_glue;
8792 if (acarray == NULL) {
8793 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8794 return (ISC_R_NOTFOUND);
8797 entry = acarray[count].entry;
8798 if (entry == NULL) {
8799 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8800 return (ISC_R_NOTFOUND);
8803 acarray[count].entry = NULL;
8804 cbarg = acarray[count].cbarg;
8805 acarray[count].cbarg = NULL;
8807 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8809 if (entry != NULL) {
8811 acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8812 dns_acache_detachentry(&entry);
8815 return (ISC_R_SUCCESS);
8820 * Routines for LRU-based cache management.
8824 * See if a given cache entry that is being reused needs to be updated
8825 * in the LRU-list. From the LRU management point of view, this function is
8826 * expected to return true for almost all cases. When used with threads,
8827 * however, this may cause a non-negligible performance penalty because a
8828 * writer lock will have to be acquired before updating the list.
8829 * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8830 * function returns true if the entry has not been updated for some period of
8831 * time. We differentiate the NS or glue address case and the others since
8832 * experiments have shown that the former tends to be accessed relatively
8833 * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8834 * may cause external queries at a higher level zone, involving more
8837 * Caller must hold the node (read or write) lock.
8839 static inline isc_boolean_t
8840 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8841 if ((header->attributes &
8842 (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8845 #if DNS_RBTDB_LIMITLRUUPDATE
8846 if (header->type == dns_rdatatype_ns ||
8847 (header->trust == dns_trust_glue &&
8848 (header->type == dns_rdatatype_a ||
8849 header->type == dns_rdatatype_aaaa))) {
8851 * Glue records are updated if at least 60 seconds have passed
8852 * since the previous update time.
8854 return (header->last_used + 60 <= now);
8857 /* Other records are updated if 5 minutes have passed. */
8858 return (header->last_used + 300 <= now);
8867 * Update the timestamp of a given cache entry and move it to the head
8868 * of the corresponding LRU list.
8870 * Caller must hold the node (write) lock.
8872 * Note that the we do NOT touch the heap here, as the TTL has not changed.
8875 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8878 INSIST(IS_CACHE(rbtdb));
8880 /* To be checked: can we really assume this? XXXMLG */
8881 INSIST(ISC_LINK_LINKED(header, link));
8883 ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8884 header->last_used = now;
8885 ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8889 * Purge some expired and/or stale (i.e. unused for some period) cache entries
8890 * under an overmem condition. To recover from this condition quickly, up to
8891 * 2 entries will be purged. This process is triggered while adding a new
8892 * entry, and we specifically avoid purging entries in the same LRU bucket as
8893 * the one to which the new entry will belong. Otherwise, we might purge
8894 * entries of the same name of different RR types while adding RRsets from a
8895 * single response (consider the case where we're adding A and AAAA glue records
8896 * of the same NS name).
8899 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8900 isc_stdtime_t now, isc_boolean_t tree_locked)
8902 rdatasetheader_t *header, *header_prev;
8903 unsigned int locknum;
8906 for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8907 locknum != locknum_start && purgecount > 0;
8908 locknum = (locknum + 1) % rbtdb->node_lock_count) {
8909 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8910 isc_rwlocktype_write);
8912 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8913 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8914 expire_header(rbtdb, header, tree_locked);
8918 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8919 header != NULL && purgecount > 0;
8920 header = header_prev) {
8921 header_prev = ISC_LIST_PREV(header, link);
8923 * Unlink the entry at this point to avoid checking it
8924 * again even if it's currently used someone else and
8925 * cannot be purged at this moment. This entry won't be
8926 * referenced any more (so unlinking is safe) since the
8927 * TTL was reset to 0.
8929 ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8931 expire_header(rbtdb, header, tree_locked);
8935 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8936 isc_rwlocktype_write);
8941 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8942 isc_boolean_t tree_locked)
8944 set_ttl(rbtdb, header, 0);
8945 header->attributes |= RDATASET_ATTR_STALE;
8946 header->node->dirty = 1;
8949 * Caller must hold the node (write) lock.
8952 if (dns_rbtnode_refcurrent(header->node) == 0) {
8954 * If no one else is using the node, we can clean it up now.
8955 * We first need to gain a new reference to the node to meet a
8956 * requirement of decrement_reference().
8958 new_reference(rbtdb, header->node);
8959 decrement_reference(rbtdb, header->node, 0,
8960 isc_rwlocktype_write,
8961 tree_locked ? isc_rwlocktype_write :
8962 isc_rwlocktype_none, ISC_FALSE);