update to 9.7.1-P2
[tridge/bind9.git] / lib / dns / rbtdb.c
1 /*
2  * Copyright (C) 2004-2010  Internet Systems Consortium, Inc. ("ISC")
3  * Copyright (C) 1999-2003  Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and/or distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10  * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11  * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12  * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13  * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14  * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15  * PERFORMANCE OF THIS SOFTWARE.
16  */
17
18 /* $Id: rbtdb.c,v 1.292.8.9 2010/05/10 01:41:11 marka Exp $ */
19
20 /*! \file */
21
22 /*
23  * Principal Author: Bob Halley
24  */
25
26 #include <config.h>
27
28 /* #define inline */
29
30 #include <isc/event.h>
31 #include <isc/heap.h>
32 #include <isc/mem.h>
33 #include <isc/mutex.h>
34 #include <isc/platform.h>
35 #include <isc/print.h>
36 #include <isc/random.h>
37 #include <isc/refcount.h>
38 #include <isc/rwlock.h>
39 #include <isc/serial.h>
40 #include <isc/string.h>
41 #include <isc/task.h>
42 #include <isc/time.h>
43 #include <isc/util.h>
44
45 #include <dns/acache.h>
46 #include <dns/db.h>
47 #include <dns/dbiterator.h>
48 #include <dns/events.h>
49 #include <dns/fixedname.h>
50 #include <dns/lib.h>
51 #include <dns/log.h>
52 #include <dns/masterdump.h>
53 #include <dns/nsec.h>
54 #include <dns/nsec3.h>
55 #include <dns/rbt.h>
56 #include <dns/rdata.h>
57 #include <dns/rdataset.h>
58 #include <dns/rdatasetiter.h>
59 #include <dns/rdataslab.h>
60 #include <dns/rdatastruct.h>
61 #include <dns/result.h>
62 #include <dns/stats.h>
63 #include <dns/view.h>
64 #include <dns/zone.h>
65 #include <dns/zonekey.h>
66
67 #ifdef DNS_RBTDB_VERSION64
68 #include "rbtdb64.h"
69 #else
70 #include "rbtdb.h"
71 #endif
72
73 #ifdef DNS_RBTDB_VERSION64
74 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75 #else
76 #define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 #endif
78
79 /*%
80  * Note that "impmagic" is not the first four bytes of the struct, so
81  * ISC_MAGIC_VALID cannot be used.
82  */
83 #define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
84                                  (rbtdb)->common.impmagic == RBTDB_MAGIC)
85
86 #ifdef DNS_RBTDB_VERSION64
87 typedef isc_uint64_t                    rbtdb_serial_t;
88 /*%
89  * Make casting easier in symbolic debuggers by using different names
90  * for the 64 bit version.
91  */
92 #define dns_rbtdb_t dns_rbtdb64_t
93 #define rdatasetheader_t rdatasetheader64_t
94 #define rbtdb_version_t rbtdb_version64_t
95 #else
96 typedef isc_uint32_t                    rbtdb_serial_t;
97 #endif
98
99 typedef isc_uint32_t                    rbtdb_rdatatype_t;
100
101 #define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
102 #define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103 #define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
104
105 #define RBTDB_RDATATYPE_SIGNSEC \
106                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 #define RBTDB_RDATATYPE_SIGNSEC3 \
108                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
109 #define RBTDB_RDATATYPE_SIGNS \
110                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
111 #define RBTDB_RDATATYPE_SIGCNAME \
112                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113 #define RBTDB_RDATATYPE_SIGDNAME \
114                 RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115 #define RBTDB_RDATATYPE_NCACHEANY \
116                 RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
117
118 /*
119  * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120  * Using rwlock is effective with regard to lookup performance only when
121  * it is implemented in an efficient way.
122  * Otherwise, it is generally wise to stick to the simple locking since rwlock
123  * would require more memory or can even make lookups slower due to its own
124  * overhead (when it internally calls mutex locks).
125  */
126 #ifdef ISC_RWLOCK_USEATOMIC
127 #define DNS_RBTDB_USERWLOCK 1
128 #else
129 #define DNS_RBTDB_USERWLOCK 0
130 #endif
131
132 #if DNS_RBTDB_USERWLOCK
133 #define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
134 #define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
135 #define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
136 #define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137 #else
138 #define RBTDB_INITLOCK(l)       isc_mutex_init(l)
139 #define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
140 #define RBTDB_LOCK(l, t)        LOCK(l)
141 #define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 #endif
143
144 /*
145  * Since node locking is sensitive to both performance and memory footprint,
146  * we need some trick here.  If we have both high-performance rwlock and
147  * high performance and small-memory reference counters, we use rwlock for
148  * node lock and isc_refcount for node references.  In this case, we don't have
149  * to protect the access to the counters by locks.
150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
151  * simple integers as reference counters which is protected by the lock.
152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
154  * counters first and then protect other parts of a node as read-only data.
155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
156  * provided for these special cases.  When we can use the efficient backend
157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
159  * section including the access to the reference counter.
160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
161  * section is also protected by NODE_STRONGLOCK().
162  */
163 #if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
164 typedef isc_rwlock_t nodelock_t;
165
166 #define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
167 #define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
168 #define NODE_LOCK(l, t)         RWLOCK((l), (t))
169 #define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
170 #define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)
171
172 #define NODE_STRONGLOCK(l)      ((void)0)
173 #define NODE_STRONGUNLOCK(l)    ((void)0)
174 #define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
175 #define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
176 #define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 #else
178 typedef isc_mutex_t nodelock_t;
179
180 #define NODE_INITLOCK(l)        isc_mutex_init(l)
181 #define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
182 #define NODE_LOCK(l, t)         LOCK(l)
183 #define NODE_UNLOCK(l, t)       UNLOCK(l)
184 #define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS
185
186 #define NODE_STRONGLOCK(l)      LOCK(l)
187 #define NODE_STRONGUNLOCK(l)    UNLOCK(l)
188 #define NODE_WEAKLOCK(l, t)     ((void)0)
189 #define NODE_WEAKUNLOCK(l, t)   ((void)0)
190 #define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 #endif
192
193 /*%
194  * Whether to rate-limit updating the LRU to avoid possible thread contention.
195  * Our performance measurement has shown the cost is marginal, so it's defined
196  * to be 0 by default either with or without threads.
197  */
198 #ifndef DNS_RBTDB_LIMITLRUUPDATE
199 #define DNS_RBTDB_LIMITLRUUPDATE 0
200 #endif
201
202 /*
203  * Allow clients with a virtual time of up to 5 minutes in the past to see
204  * records that would have otherwise have expired.
205  */
206 #define RBTDB_VIRTUAL 300
207
208 struct noqname {
209         dns_name_t      name;
210         void *          neg;
211         void *          negsig;
212         dns_rdatatype_t type;
213 };
214
215 typedef struct acachectl acachectl_t;
216
217 typedef struct rdatasetheader {
218         /*%
219          * Locked by the owning node's lock.
220          */
221         rbtdb_serial_t                  serial;
222         dns_ttl_t                       rdh_ttl;
223         rbtdb_rdatatype_t               type;
224         isc_uint16_t                    attributes;
225         dns_trust_t                     trust;
226         struct noqname                  *noqname;
227         struct noqname                  *closest;
228         /*%<
229          * We don't use the LIST macros, because the LIST structure has
230          * both head and tail pointers, and is doubly linked.
231          */
232
233         struct rdatasetheader           *next;
234         /*%<
235          * If this is the top header for an rdataset, 'next' points
236          * to the top header for the next rdataset (i.e., the next type).
237          * Otherwise, it points up to the header whose down pointer points
238          * at this header.
239          */
240
241         struct rdatasetheader           *down;
242         /*%<
243          * Points to the header for the next older version of
244          * this rdataset.
245          */
246
247         isc_uint32_t                    count;
248         /*%<
249          * Monotonously increased every time this rdataset is bound so that
250          * it is used as the base of the starting point in DNS responses
251          * when the "cyclic" rrset-order is required.  Since the ordering
252          * should not be so crucial, no lock is set for the counter for
253          * performance reasons.
254          */
255
256         acachectl_t                     *additional_auth;
257         acachectl_t                     *additional_glue;
258
259         dns_rbtnode_t                   *node;
260         isc_stdtime_t                   last_used;
261         ISC_LINK(struct rdatasetheader) link;
262
263         unsigned int                    heap_index;
264         /*%<
265          * Used for TTL-based cache cleaning.
266          */
267         isc_stdtime_t                   resign;
268 } rdatasetheader_t;
269
270 typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
271 typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;
272
273 #define RDATASET_ATTR_NONEXISTENT       0x0001
274 #define RDATASET_ATTR_STALE             0x0002
275 #define RDATASET_ATTR_IGNORE            0x0004
276 #define RDATASET_ATTR_RETAIN            0x0008
277 #define RDATASET_ATTR_NXDOMAIN          0x0010
278 #define RDATASET_ATTR_RESIGN            0x0020
279 #define RDATASET_ATTR_STATCOUNT         0x0040
280 #define RDATASET_ATTR_OPTOUT            0x0080
281
282 typedef struct acache_cbarg {
283         dns_rdatasetadditional_t        type;
284         unsigned int                    count;
285         dns_db_t                        *db;
286         dns_dbnode_t                    *node;
287         rdatasetheader_t                *header;
288 } acache_cbarg_t;
289
290 struct acachectl {
291         dns_acacheentry_t               *entry;
292         acache_cbarg_t                  *cbarg;
293 };
294
295 /*
296  * XXX
297  * When the cache will pre-expire data (due to memory low or other
298  * situations) before the rdataset's TTL has expired, it MUST
299  * respect the RETAIN bit and not expire the data until its TTL is
300  * expired.
301  */
302
303 #undef IGNORE                   /* WIN32 winbase.h defines this. */
304
305 #define EXISTS(header) \
306         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
307 #define NONEXISTENT(header) \
308         (((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
309 #define IGNORE(header) \
310         (((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
311 #define RETAIN(header) \
312         (((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
313 #define NXDOMAIN(header) \
314         (((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
315 #define RESIGN(header) \
316         (((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
317 #define OPTOUT(header) \
318         (((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
319
320 #define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
321
322 /*%
323  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
324  * There is a tradeoff issue about configuring this value: if this is too
325  * small, it may cause heavier contention between threads; if this is too large,
326  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
327  * The default value should work well for most environments, but this can
328  * also be configurable at compilation time via the
329  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
330  * 1 due to the assumption of overmem_purge().
331  */
332 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
333 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
334 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
335 #else
336 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
337 #endif
338 #else
339 #define DEFAULT_CACHE_NODE_LOCK_COUNT   16
340 #endif  /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
341
342 typedef struct {
343         nodelock_t                      lock;
344         /* Protected in the refcount routines. */
345         isc_refcount_t                  references;
346         /* Locked by lock. */
347         isc_boolean_t                   exiting;
348 } rbtdb_nodelock_t;
349
350 typedef struct rbtdb_changed {
351         dns_rbtnode_t *                 node;
352         isc_boolean_t                   dirty;
353         ISC_LINK(struct rbtdb_changed)  link;
354 } rbtdb_changed_t;
355
356 typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
357
358 typedef enum {
359         dns_db_insecure,
360         dns_db_partial,
361         dns_db_secure
362 } dns_db_secure_t;
363
364 typedef struct rbtdb_version {
365         /* Not locked */
366         rbtdb_serial_t                  serial;
367         /*
368          * Protected in the refcount routines.
369          * XXXJT: should we change the lock policy based on the refcount
370          * performance?
371          */
372         isc_refcount_t                  references;
373         /* Locked by database lock. */
374         isc_boolean_t                   writer;
375         isc_boolean_t                   commit_ok;
376         rbtdb_changedlist_t             changed_list;
377         rdatasetheaderlist_t            resigned_list;
378         ISC_LINK(struct rbtdb_version)  link;
379         dns_db_secure_t                 secure;
380         isc_boolean_t                   havensec3;
381         /* NSEC3 parameters */
382         dns_hash_t                      hash;
383         isc_uint8_t                     flags;
384         isc_uint16_t                    iterations;
385         isc_uint8_t                     salt_length;
386         unsigned char                   salt[DNS_NSEC3_SALTSIZE];
387 } rbtdb_version_t;
388
389 typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;
390
391 typedef struct {
392         /* Unlocked. */
393         dns_db_t                        common;
394 #if DNS_RBTDB_USERWLOCK
395         isc_rwlock_t                    lock;
396 #else
397         isc_mutex_t                     lock;
398 #endif
399         isc_rwlock_t                    tree_lock;
400         unsigned int                    node_lock_count;
401         rbtdb_nodelock_t *              node_locks;
402         dns_rbtnode_t *                 origin_node;
403         dns_stats_t *                   rrsetstats; /* cache DB only */
404         /* Locked by lock. */
405         unsigned int                    active;
406         isc_refcount_t                  references;
407         unsigned int                    attributes;
408         rbtdb_serial_t                  current_serial;
409         rbtdb_serial_t                  least_serial;
410         rbtdb_serial_t                  next_serial;
411         rbtdb_version_t *               current_version;
412         rbtdb_version_t *               future_version;
413         rbtdb_versionlist_t             open_versions;
414         isc_boolean_t                   overmem;
415         isc_task_t *                    task;
416         dns_dbnode_t                    *soanode;
417         dns_dbnode_t                    *nsnode;
418
419         /*
420          * This is a linked list used to implement the LRU cache.  There will
421          * be node_lock_count linked lists here.  Nodes in bucket 1 will be
422          * placed on the linked list rdatasets[1].
423          */
424         rdatasetheaderlist_t            *rdatasets;
425
426         /*%
427          * Temporary storage for stale cache nodes and dynamically deleted
428          * nodes that await being cleaned up.
429          */
430         rbtnodelist_t                   *deadnodes;
431
432         /*
433          * Heaps.  Each of these is used for TTL based expiry.
434          */
435         isc_heap_t                      **heaps;
436
437         /* Locked by tree_lock. */
438         dns_rbt_t *                     tree;
439         dns_rbt_t *                     nsec;
440         dns_rbt_t *                     nsec3;
441
442         /* Unlocked */
443         unsigned int                    quantum;
444 } dns_rbtdb_t;
445
446 #define RBTDB_ATTR_LOADED               0x01
447 #define RBTDB_ATTR_LOADING              0x02
448
449 /*%
450  * Search Context
451  */
452 typedef struct {
453         dns_rbtdb_t *           rbtdb;
454         rbtdb_version_t *       rbtversion;
455         rbtdb_serial_t          serial;
456         unsigned int            options;
457         dns_rbtnodechain_t      chain;
458         isc_boolean_t           copy_name;
459         isc_boolean_t           need_cleanup;
460         isc_boolean_t           wild;
461         dns_rbtnode_t *         zonecut;
462         rdatasetheader_t *      zonecut_rdataset;
463         rdatasetheader_t *      zonecut_sigrdataset;
464         dns_fixedname_t         zonecut_name;
465         isc_stdtime_t           now;
466 } rbtdb_search_t;
467
468 /*%
469  * Load Context
470  */
471 typedef struct {
472         dns_rbtdb_t *           rbtdb;
473         isc_stdtime_t           now;
474 } rbtdb_load_t;
475
476 static void rdataset_disassociate(dns_rdataset_t *rdataset);
477 static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
478 static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
479 static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
480 static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
481 static unsigned int rdataset_count(dns_rdataset_t *rdataset);
482 static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
483                                         dns_name_t *name,
484                                         dns_rdataset_t *neg,
485                                         dns_rdataset_t *negsig);
486 static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
487                                         dns_name_t *name,
488                                         dns_rdataset_t *neg,
489                                         dns_rdataset_t *negsig);
490 static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
491                                            dns_rdatasetadditional_t type,
492                                            dns_rdatatype_t qtype,
493                                            dns_acache_t *acache,
494                                            dns_zone_t **zonep,
495                                            dns_db_t **dbp,
496                                            dns_dbversion_t **versionp,
497                                            dns_dbnode_t **nodep,
498                                            dns_name_t *fname,
499                                            dns_message_t *msg,
500                                            isc_stdtime_t now);
501 static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
502                                            dns_rdatasetadditional_t type,
503                                            dns_rdatatype_t qtype,
504                                            dns_acache_t *acache,
505                                            dns_zone_t *zone,
506                                            dns_db_t *db,
507                                            dns_dbversion_t *version,
508                                            dns_dbnode_t *node,
509                                            dns_name_t *fname);
510 static isc_result_t rdataset_putadditional(dns_acache_t *acache,
511                                            dns_rdataset_t *rdataset,
512                                            dns_rdatasetadditional_t type,
513                                            dns_rdatatype_t qtype);
514 static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
515                                               isc_stdtime_t now);
516 static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
517                           isc_stdtime_t now);
518 static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
519                           isc_boolean_t tree_locked);
520 static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
521                           isc_stdtime_t now, isc_boolean_t tree_locked);
522 static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
523                                   rdatasetheader_t *newheader);
524 static void prune_tree(isc_task_t *task, isc_event_t *event);
525 static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
526 static void rdataset_expire(dns_rdataset_t *rdataset);
527
528 static dns_rdatasetmethods_t rdataset_methods = {
529         rdataset_disassociate,
530         rdataset_first,
531         rdataset_next,
532         rdataset_current,
533         rdataset_clone,
534         rdataset_count,
535         NULL,
536         rdataset_getnoqname,
537         NULL,
538         rdataset_getclosest,
539         rdataset_getadditional,
540         rdataset_setadditional,
541         rdataset_putadditional,
542         rdataset_settrust,
543         rdataset_expire
544 };
545
546 static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
547 static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
548 static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
549 static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
550                                  dns_rdataset_t *rdataset);
551
552 static dns_rdatasetitermethods_t rdatasetiter_methods = {
553         rdatasetiter_destroy,
554         rdatasetiter_first,
555         rdatasetiter_next,
556         rdatasetiter_current
557 };
558
559 typedef struct rbtdb_rdatasetiter {
560         dns_rdatasetiter_t              common;
561         rdatasetheader_t *              current;
562 } rbtdb_rdatasetiter_t;
563
564 static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
565 static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
566 static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
567 static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
568                                         dns_name_t *name);
569 static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
570 static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
571 static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
572                                            dns_dbnode_t **nodep,
573                                            dns_name_t *name);
574 static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
575 static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
576                                           dns_name_t *name);
577
578 static dns_dbiteratormethods_t dbiterator_methods = {
579         dbiterator_destroy,
580         dbiterator_first,
581         dbiterator_last,
582         dbiterator_seek,
583         dbiterator_prev,
584         dbiterator_next,
585         dbiterator_current,
586         dbiterator_pause,
587         dbiterator_origin
588 };
589
590 #define DELETION_BATCH_MAX 64
591
592 /*
593  * If 'paused' is ISC_TRUE, then the tree lock is not being held.
594  */
595 typedef struct rbtdb_dbiterator {
596         dns_dbiterator_t                common;
597         isc_boolean_t                   paused;
598         isc_boolean_t                   new_origin;
599         isc_rwlocktype_t                tree_locked;
600         isc_result_t                    result;
601         dns_fixedname_t                 name;
602         dns_fixedname_t                 origin;
603         dns_rbtnodechain_t              chain;
604         dns_rbtnodechain_t              nsec3chain;
605         dns_rbtnodechain_t              *current;
606         dns_rbtnode_t                   *node;
607         dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
608         int                             delete;
609         isc_boolean_t                   nsec3only;
610         isc_boolean_t                   nonsec3;
611 } rbtdb_dbiterator_t;
612
613
614 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
615 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
616
617 static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
618                        isc_event_t *event);
619 static void overmem(dns_db_t *db, isc_boolean_t overmem);
620 #ifdef BIND9
621 static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
622 #endif
623
624 /*%
625  * 'init_count' is used to initialize 'newheader->count' which inturn
626  * is used to determine where in the cycle rrset-order cyclic starts.
627  * We don't lock this as we don't care about simultaneous updates.
628  *
629  * Note:
630  *      Both init_count and header->count can be ISC_UINT32_MAX.
631  *      The count on the returned rdataset however can't be as
632  *      that indicates that the database does not implement cyclic
633  *      processing.
634  */
635 static unsigned int init_count;
636
637 /*
638  * Locking
639  *
640  * If a routine is going to lock more than one lock in this module, then
641  * the locking must be done in the following order:
642  *
643  *      Tree Lock
644  *
645  *      Node Lock       (Only one from the set may be locked at one time by
646  *                       any caller)
647  *
648  *      Database Lock
649  *
650  * Failure to follow this hierarchy can result in deadlock.
651  */
652
653 /*
654  * Deleting Nodes
655  *
656  * For zone databases the node for the origin of the zone MUST NOT be deleted.
657  */
658
659
660 /*
661  * DB Routines
662  */
663
664 static void
665 attach(dns_db_t *source, dns_db_t **targetp) {
666         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
667
668         REQUIRE(VALID_RBTDB(rbtdb));
669
670         isc_refcount_increment(&rbtdb->references, NULL);
671
672         *targetp = source;
673 }
674
675 static void
676 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
677         dns_rbtdb_t *rbtdb = event->ev_arg;
678
679         UNUSED(task);
680
681         free_rbtdb(rbtdb, ISC_TRUE, event);
682 }
683
684 static void
685 update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
686                   isc_boolean_t increment)
687 {
688         dns_rdatastatstype_t statattributes = 0;
689         dns_rdatastatstype_t base = 0;
690         dns_rdatastatstype_t type;
691
692         /* At the moment we count statistics only for cache DB */
693         INSIST(IS_CACHE(rbtdb));
694
695         if (NXDOMAIN(header))
696                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
697         else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
698                 statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
699                 base = RBTDB_RDATATYPE_EXT(header->type);
700         } else
701                 base = RBTDB_RDATATYPE_BASE(header->type);
702
703         type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
704         if (increment)
705                 dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
706         else
707                 dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
708 }
709
710 static void
711 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
712         int idx;
713         isc_heap_t *heap;
714         dns_ttl_t oldttl;
715
716         oldttl = header->rdh_ttl;
717         header->rdh_ttl = newttl;
718
719         if (!IS_CACHE(rbtdb))
720                 return;
721
722         /*
723          * It's possible the rbtdb is not a cache.  If this is the case,
724          * we will not have a heap, and we move on.  If we do, though,
725          * we might need to adjust things.
726          */
727         if (header->heap_index == 0 || newttl == oldttl)
728                 return;
729         idx = header->node->locknum;
730         if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
731             return;
732         heap = rbtdb->heaps[idx];
733
734         if (newttl < oldttl)
735                 isc_heap_increased(heap, header->heap_index);
736         else
737                 isc_heap_decreased(heap, header->heap_index);
738 }
739
740 /*%
741  * These functions allow the heap code to rank the priority of each
742  * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
743  */
744 static isc_boolean_t
745 ttl_sooner(void *v1, void *v2) {
746         rdatasetheader_t *h1 = v1;
747         rdatasetheader_t *h2 = v2;
748
749         if (h1->rdh_ttl < h2->rdh_ttl)
750                 return (ISC_TRUE);
751         return (ISC_FALSE);
752 }
753
754 static isc_boolean_t
755 resign_sooner(void *v1, void *v2) {
756         rdatasetheader_t *h1 = v1;
757         rdatasetheader_t *h2 = v2;
758
759         if (h1->resign < h2->resign)
760                 return (ISC_TRUE);
761         return (ISC_FALSE);
762 }
763
764 /*%
765  * This function sets the heap index into the header.
766  */
767 static void
768 set_index(void *what, unsigned int index) {
769         rdatasetheader_t *h = what;
770
771         h->heap_index = index;
772 }
773
774 /*%
775  * Work out how many nodes can be deleted in the time between two
776  * requests to the nameserver.  Smooth the resulting number and use it
777  * as a estimate for the number of nodes to be deleted in the next
778  * iteration.
779  */
780 static unsigned int
781 adjust_quantum(unsigned int old, isc_time_t *start) {
782         unsigned int pps = dns_pps;     /* packets per second */
783         unsigned int interval;
784         isc_uint64_t usecs;
785         isc_time_t end;
786         unsigned int new;
787
788         if (pps < 100)
789                 pps = 100;
790         isc_time_now(&end);
791
792         interval = 1000000 / pps;       /* interval in usec */
793         if (interval == 0)
794                 interval = 1;
795         usecs = isc_time_microdiff(&end, start);
796         if (usecs == 0) {
797                 /*
798                  * We were unable to measure the amount of time taken.
799                  * Double the nodes deleted next time.
800                  */
801                 old *= 2;
802                 if (old > 1000)
803                         old = 1000;
804                 return (old);
805         }
806         new = old * interval;
807         new /= (unsigned int)usecs;
808         if (new == 0)
809                 new = 1;
810         else if (new > 1000)
811                 new = 1000;
812
813         /* Smooth */
814         new = (new + old * 3) / 4;
815
816         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
817                       ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);
818
819         return (new);
820 }
821
822 static void
823 free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
824         unsigned int i;
825         isc_ondestroy_t ondest;
826         isc_result_t result;
827         char buf[DNS_NAME_FORMATSIZE];
828         dns_rbt_t **treep;
829         isc_time_t start;
830
831         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
832                 overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);
833
834         REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
835         REQUIRE(rbtdb->future_version == NULL);
836
837         if (rbtdb->current_version != NULL) {
838                 unsigned int refs;
839
840                 isc_refcount_decrement(&rbtdb->current_version->references,
841                                        &refs);
842                 INSIST(refs == 0);
843                 UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
844                 isc_refcount_destroy(&rbtdb->current_version->references);
845                 isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
846                             sizeof(rbtdb_version_t));
847         }
848
849         /*
850          * We assume the number of remaining dead nodes is reasonably small;
851          * the overhead of unlinking all nodes here should be negligible.
852          */
853         for (i = 0; i < rbtdb->node_lock_count; i++) {
854                 dns_rbtnode_t *node;
855
856                 node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
857                 while (node != NULL) {
858                         ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
859                         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
860                 }
861         }
862
863         if (event == NULL)
864                 rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
865
866         for (;;) {
867                 /*
868                  * pick the next tree to (start to) destroy
869                  */
870                 treep = &rbtdb->tree;
871                 if (*treep == NULL) {
872                         treep = &rbtdb->nsec;
873                         if (*treep == NULL) {
874                                 treep = &rbtdb->nsec3;
875                                 /*
876                                  * we're finished after clear cutting
877                                  */
878                                 if (*treep == NULL)
879                                         break;
880                         }
881                 }
882
883                 isc_time_now(&start);
884                 result = dns_rbt_destroy2(treep, rbtdb->quantum);
885                 if (result == ISC_R_QUOTA) {
886                         INSIST(rbtdb->task != NULL);
887                         if (rbtdb->quantum != 0)
888                                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
889                                                                 &start);
890                         if (event == NULL)
891                                 event = isc_event_allocate(rbtdb->common.mctx,
892                                                            NULL,
893                                                          DNS_EVENT_FREESTORAGE,
894                                                            free_rbtdb_callback,
895                                                            rbtdb,
896                                                            sizeof(isc_event_t));
897                         if (event == NULL)
898                                 continue;
899                         isc_task_send(rbtdb->task, &event);
900                         return;
901                 }
902                 INSIST(result == ISC_R_SUCCESS && *treep == NULL);
903         }
904
905         if (event != NULL)
906                 isc_event_free(&event);
907         if (log) {
908                 if (dns_name_dynamic(&rbtdb->common.origin))
909                         dns_name_format(&rbtdb->common.origin, buf,
910                                         sizeof(buf));
911                 else
912                         strcpy(buf, "<UNKNOWN>");
913                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
914                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
915                               "done free_rbtdb(%s)", buf);
916         }
917         if (dns_name_dynamic(&rbtdb->common.origin))
918                 dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
919         for (i = 0; i < rbtdb->node_lock_count; i++) {
920                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
921                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
922         }
923
924         /*
925          * Clean up LRU / re-signing order lists.
926          */
927         if (rbtdb->rdatasets != NULL) {
928                 for (i = 0; i < rbtdb->node_lock_count; i++)
929                         INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
930                 isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
931                             rbtdb->node_lock_count *
932                             sizeof(rdatasetheaderlist_t));
933         }
934         /*
935          * Clean up dead node buckets.
936          */
937         if (rbtdb->deadnodes != NULL) {
938                 for (i = 0; i < rbtdb->node_lock_count; i++)
939                         INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
940                 isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
941                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
942         }
943         /*
944          * Clean up heap objects.
945          */
946         if (rbtdb->heaps != NULL) {
947                 for (i = 0; i < rbtdb->node_lock_count; i++)
948                         isc_heap_destroy(&rbtdb->heaps[i]);
949                 isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
950                             rbtdb->node_lock_count *
951                             sizeof(isc_heap_t *));
952         }
953
954         if (rbtdb->rrsetstats != NULL)
955                 dns_stats_detach(&rbtdb->rrsetstats);
956
957         isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
958                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
959         isc_rwlock_destroy(&rbtdb->tree_lock);
960         isc_refcount_destroy(&rbtdb->references);
961         if (rbtdb->task != NULL)
962                 isc_task_detach(&rbtdb->task);
963
964         RBTDB_DESTROYLOCK(&rbtdb->lock);
965         rbtdb->common.magic = 0;
966         rbtdb->common.impmagic = 0;
967         ondest = rbtdb->common.ondest;
968         isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
969         isc_ondestroy_notify(&ondest, rbtdb);
970 }
971
972 static inline void
973 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
974         isc_boolean_t want_free = ISC_FALSE;
975         unsigned int i;
976         unsigned int inactive = 0;
977
978         /* XXX check for open versions here */
979
980         if (rbtdb->soanode != NULL)
981                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
982         if (rbtdb->nsnode != NULL)
983                 dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
984
985         /*
986          * Even though there are no external direct references, there still
987          * may be nodes in use.
988          */
989         for (i = 0; i < rbtdb->node_lock_count; i++) {
990                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
991                 rbtdb->node_locks[i].exiting = ISC_TRUE;
992                 NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
993                 if (isc_refcount_current(&rbtdb->node_locks[i].references)
994                     == 0) {
995                         inactive++;
996                 }
997         }
998
999         if (inactive != 0) {
1000                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1001                 rbtdb->active -= inactive;
1002                 if (rbtdb->active == 0)
1003                         want_free = ISC_TRUE;
1004                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1005                 if (want_free) {
1006                         char buf[DNS_NAME_FORMATSIZE];
1007                         if (dns_name_dynamic(&rbtdb->common.origin))
1008                                 dns_name_format(&rbtdb->common.origin, buf,
1009                                                 sizeof(buf));
1010                         else
1011                                 strcpy(buf, "<UNKNOWN>");
1012                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1013                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
1014                                       "calling free_rbtdb(%s)", buf);
1015                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
1016                 }
1017         }
1018 }
1019
1020 static void
1021 detach(dns_db_t **dbp) {
1022         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
1023         unsigned int refs;
1024
1025         REQUIRE(VALID_RBTDB(rbtdb));
1026
1027         isc_refcount_decrement(&rbtdb->references, &refs);
1028
1029         if (refs == 0)
1030                 maybe_free_rbtdb(rbtdb);
1031
1032         *dbp = NULL;
1033 }
1034
1035 static void
1036 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
1037         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1038         rbtdb_version_t *version;
1039         unsigned int refs;
1040
1041         REQUIRE(VALID_RBTDB(rbtdb));
1042
1043         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1044         version = rbtdb->current_version;
1045         isc_refcount_increment(&version->references, &refs);
1046         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1047
1048         *versionp = (dns_dbversion_t *)version;
1049 }
1050
1051 static inline rbtdb_version_t *
1052 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
1053                  unsigned int references, isc_boolean_t writer)
1054 {
1055         isc_result_t result;
1056         rbtdb_version_t *version;
1057
1058         version = isc_mem_get(mctx, sizeof(*version));
1059         if (version == NULL)
1060                 return (NULL);
1061         version->serial = serial;
1062         result = isc_refcount_init(&version->references, references);
1063         if (result != ISC_R_SUCCESS) {
1064                 isc_mem_put(mctx, version, sizeof(*version));
1065                 return (NULL);
1066         }
1067         version->writer = writer;
1068         version->commit_ok = ISC_FALSE;
1069         ISC_LIST_INIT(version->changed_list);
1070         ISC_LIST_INIT(version->resigned_list);
1071         ISC_LINK_INIT(version, link);
1072
1073         return (version);
1074 }
1075
1076 static isc_result_t
1077 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
1078         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1079         rbtdb_version_t *version;
1080
1081         REQUIRE(VALID_RBTDB(rbtdb));
1082         REQUIRE(versionp != NULL && *versionp == NULL);
1083         REQUIRE(rbtdb->future_version == NULL);
1084
1085         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1086         RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
1087         version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
1088                                    ISC_TRUE);
1089         if (version != NULL) {
1090                 version->commit_ok = ISC_TRUE;
1091                 version->secure = rbtdb->current_version->secure;
1092                 version->havensec3 = rbtdb->current_version->havensec3;
1093                 if (version->havensec3) {
1094                         version->flags = rbtdb->current_version->flags;
1095                         version->iterations =
1096                                 rbtdb->current_version->iterations;
1097                         version->hash = rbtdb->current_version->hash;
1098                         version->salt_length =
1099                                 rbtdb->current_version->salt_length;
1100                         memcpy(version->salt, rbtdb->current_version->salt,
1101                                version->salt_length);
1102                 } else {
1103                         version->flags = 0;
1104                         version->iterations = 0;
1105                         version->hash = 0;
1106                         version->salt_length = 0;
1107                         memset(version->salt, 0, sizeof(version->salt));
1108                 }
1109                 rbtdb->next_serial++;
1110                 rbtdb->future_version = version;
1111         }
1112         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1113
1114         if (version == NULL)
1115                 return (ISC_R_NOMEMORY);
1116
1117         *versionp = version;
1118
1119         return (ISC_R_SUCCESS);
1120 }
1121
1122 static void
1123 attachversion(dns_db_t *db, dns_dbversion_t *source,
1124               dns_dbversion_t **targetp)
1125 {
1126         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
1127         rbtdb_version_t *rbtversion = source;
1128         unsigned int refs;
1129
1130         REQUIRE(VALID_RBTDB(rbtdb));
1131
1132         isc_refcount_increment(&rbtversion->references, &refs);
1133         INSIST(refs > 1);
1134
1135         *targetp = rbtversion;
1136 }
1137
1138 static rbtdb_changed_t *
1139 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1140             dns_rbtnode_t *node)
1141 {
1142         rbtdb_changed_t *changed;
1143         unsigned int refs;
1144
1145         /*
1146          * Caller must be holding the node lock if its reference must be
1147          * protected by the lock.
1148          */
1149
1150         changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
1151
1152         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
1153
1154         REQUIRE(version->writer);
1155
1156         if (changed != NULL) {
1157                 dns_rbtnode_refincrement(node, &refs);
1158                 INSIST(refs != 0);
1159                 changed->node = node;
1160                 changed->dirty = ISC_FALSE;
1161                 ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
1162         } else
1163                 version->commit_ok = ISC_FALSE;
1164
1165         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
1166
1167         return (changed);
1168 }
1169
1170 static void
1171 free_acachearray(isc_mem_t *mctx, rdatasetheader_t *header,
1172                  acachectl_t *array)
1173 {
1174         unsigned int count;
1175         unsigned int i;
1176         unsigned char *raw;     /* RDATASLAB */
1177
1178         /*
1179          * The caller must be holding the corresponding node lock.
1180          */
1181
1182         if (array == NULL)
1183                 return;
1184
1185         raw = (unsigned char *)header + sizeof(*header);
1186         count = raw[0] * 256 + raw[1];
1187
1188         /*
1189          * Sanity check: since an additional cache entry has a reference to
1190          * the original DB node (in the callback arg), there should be no
1191          * acache entries when the node can be freed.
1192          */
1193         for (i = 0; i < count; i++)
1194                 INSIST(array[i].entry == NULL && array[i].cbarg == NULL);
1195
1196         isc_mem_put(mctx, array, count * sizeof(acachectl_t));
1197 }
1198
1199 static inline void
1200 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
1201
1202         if (dns_name_dynamic(&(*noqname)->name))
1203                 dns_name_free(&(*noqname)->name, mctx);
1204         if ((*noqname)->neg != NULL)
1205                 isc_mem_put(mctx, (*noqname)->neg,
1206                             dns_rdataslab_size((*noqname)->neg, 0));
1207         if ((*noqname)->negsig != NULL)
1208                 isc_mem_put(mctx, (*noqname)->negsig,
1209                             dns_rdataslab_size((*noqname)->negsig, 0));
1210         isc_mem_put(mctx, *noqname, sizeof(**noqname));
1211         *noqname = NULL;
1212 }
1213
1214 static inline void
1215 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h)
1216 {
1217         ISC_LINK_INIT(h, link);
1218         h->heap_index = 0;
1219
1220 #if TRACE_HEADER
1221         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1222                 fprintf(stderr, "initialized header: %p\n", h);
1223 #else
1224         UNUSED(rbtdb);
1225 #endif
1226 }
1227
1228 static inline rdatasetheader_t *
1229 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx)
1230 {
1231         rdatasetheader_t *h;
1232
1233         h = isc_mem_get(mctx, sizeof(*h));
1234         if (h == NULL)
1235                 return (NULL);
1236
1237 #if TRACE_HEADER
1238         if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
1239                 fprintf(stderr, "allocated header: %p\n", h);
1240 #endif
1241         init_rdataset(rbtdb, h);
1242         return (h);
1243 }
1244
1245 static inline void
1246 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset)
1247 {
1248         unsigned int size;
1249         int idx;
1250
1251         if (EXISTS(rdataset) &&
1252             (rdataset->attributes & RDATASET_ATTR_STATCOUNT) != 0) {
1253                 update_rrsetstats(rbtdb, rdataset, ISC_FALSE);
1254         }
1255
1256         idx = rdataset->node->locknum;
1257         if (ISC_LINK_LINKED(rdataset, link)) {
1258                 INSIST(IS_CACHE(rbtdb));
1259                 ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
1260         }
1261         if (rdataset->heap_index != 0)
1262                 isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
1263         rdataset->heap_index = 0;
1264
1265         if (rdataset->noqname != NULL)
1266                 free_noqname(mctx, &rdataset->noqname);
1267         if (rdataset->closest != NULL)
1268                 free_noqname(mctx, &rdataset->closest);
1269
1270         free_acachearray(mctx, rdataset, rdataset->additional_auth);
1271         free_acachearray(mctx, rdataset, rdataset->additional_glue);
1272
1273         if ((rdataset->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
1274                 size = sizeof(*rdataset);
1275         else
1276                 size = dns_rdataslab_size((unsigned char *)rdataset,
1277                                           sizeof(*rdataset));
1278         isc_mem_put(mctx, rdataset, size);
1279 }
1280
1281 static inline void
1282 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
1283         rdatasetheader_t *header, *dcurrent;
1284         isc_boolean_t make_dirty = ISC_FALSE;
1285
1286         /*
1287          * Caller must hold the node lock.
1288          */
1289
1290         /*
1291          * We set the IGNORE attribute on rdatasets with serial number
1292          * 'serial'.  When the reference count goes to zero, these rdatasets
1293          * will be cleaned up; until that time, they will be ignored.
1294          */
1295         for (header = node->data; header != NULL; header = header->next) {
1296                 if (header->serial == serial) {
1297                         header->attributes |= RDATASET_ATTR_IGNORE;
1298                         make_dirty = ISC_TRUE;
1299                 }
1300                 for (dcurrent = header->down;
1301                      dcurrent != NULL;
1302                      dcurrent = dcurrent->down) {
1303                         if (dcurrent->serial == serial) {
1304                                 dcurrent->attributes |= RDATASET_ATTR_IGNORE;
1305                                 make_dirty = ISC_TRUE;
1306                         }
1307                 }
1308         }
1309         if (make_dirty)
1310                 node->dirty = 1;
1311 }
1312
1313 static inline void
1314 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *top)
1315 {
1316         rdatasetheader_t *d, *down_next;
1317
1318         for (d = top->down; d != NULL; d = down_next) {
1319                 down_next = d->down;
1320                 free_rdataset(rbtdb, mctx, d);
1321         }
1322         top->down = NULL;
1323 }
1324
1325 static inline void
1326 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1327         rdatasetheader_t *current, *top_prev, *top_next;
1328         isc_mem_t *mctx = rbtdb->common.mctx;
1329
1330         /*
1331          * Caller must be holding the node lock.
1332          */
1333
1334         top_prev = NULL;
1335         for (current = node->data; current != NULL; current = top_next) {
1336                 top_next = current->next;
1337                 clean_stale_headers(rbtdb, mctx, current);
1338                 /*
1339                  * If current is nonexistent or stale, we can clean it up.
1340                  */
1341                 if ((current->attributes &
1342                      (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0) {
1343                         if (top_prev != NULL)
1344                                 top_prev->next = current->next;
1345                         else
1346                                 node->data = current->next;
1347                         free_rdataset(rbtdb, mctx, current);
1348                 } else
1349                         top_prev = current;
1350         }
1351         node->dirty = 0;
1352 }
1353
1354 static inline void
1355 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1356                 rbtdb_serial_t least_serial)
1357 {
1358         rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
1359         rdatasetheader_t *top_prev, *top_next;
1360         isc_mem_t *mctx = rbtdb->common.mctx;
1361         isc_boolean_t still_dirty = ISC_FALSE;
1362
1363         /*
1364          * Caller must be holding the node lock.
1365          */
1366         REQUIRE(least_serial != 0);
1367
1368         top_prev = NULL;
1369         for (current = node->data; current != NULL; current = top_next) {
1370                 top_next = current->next;
1371
1372                 /*
1373                  * First, we clean up any instances of multiple rdatasets
1374                  * with the same serial number, or that have the IGNORE
1375                  * attribute.
1376                  */
1377                 dparent = current;
1378                 for (dcurrent = current->down;
1379                      dcurrent != NULL;
1380                      dcurrent = down_next) {
1381                         down_next = dcurrent->down;
1382                         INSIST(dcurrent->serial <= dparent->serial);
1383                         if (dcurrent->serial == dparent->serial ||
1384                             IGNORE(dcurrent)) {
1385                                 if (down_next != NULL)
1386                                         down_next->next = dparent;
1387                                 dparent->down = down_next;
1388                                 free_rdataset(rbtdb, mctx, dcurrent);
1389                         } else
1390                                 dparent = dcurrent;
1391                 }
1392
1393                 /*
1394                  * We've now eliminated all IGNORE datasets with the possible
1395                  * exception of current, which we now check.
1396                  */
1397                 if (IGNORE(current)) {
1398                         down_next = current->down;
1399                         if (down_next == NULL) {
1400                                 if (top_prev != NULL)
1401                                         top_prev->next = current->next;
1402                                 else
1403                                         node->data = current->next;
1404                                 free_rdataset(rbtdb, mctx, current);
1405                                 /*
1406                                  * current no longer exists, so we can
1407                                  * just continue with the loop.
1408                                  */
1409                                 continue;
1410                         } else {
1411                                 /*
1412                                  * Pull up current->down, making it the new
1413                                  * current.
1414                                  */
1415                                 if (top_prev != NULL)
1416                                         top_prev->next = down_next;
1417                                 else
1418                                         node->data = down_next;
1419                                 down_next->next = top_next;
1420                                 free_rdataset(rbtdb, mctx, current);
1421                                 current = down_next;
1422                         }
1423                 }
1424
1425                 /*
1426                  * We now try to find the first down node less than the
1427                  * least serial.
1428                  */
1429                 dparent = current;
1430                 for (dcurrent = current->down;
1431                      dcurrent != NULL;
1432                      dcurrent = down_next) {
1433                         down_next = dcurrent->down;
1434                         if (dcurrent->serial < least_serial)
1435                                 break;
1436                         dparent = dcurrent;
1437                 }
1438
1439                 /*
1440                  * If there is a such an rdataset, delete it and any older
1441                  * versions.
1442                  */
1443                 if (dcurrent != NULL) {
1444                         do {
1445                                 down_next = dcurrent->down;
1446                                 INSIST(dcurrent->serial <= least_serial);
1447                                 free_rdataset(rbtdb, mctx, dcurrent);
1448                                 dcurrent = down_next;
1449                         } while (dcurrent != NULL);
1450                         dparent->down = NULL;
1451                 }
1452
1453                 /*
1454                  * Note.  The serial number of 'current' might be less than
1455                  * least_serial too, but we cannot delete it because it is
1456                  * the most recent version, unless it is a NONEXISTENT
1457                  * rdataset.
1458                  */
1459                 if (current->down != NULL) {
1460                         still_dirty = ISC_TRUE;
1461                         top_prev = current;
1462                 } else {
1463                         /*
1464                          * If this is a NONEXISTENT rdataset, we can delete it.
1465                          */
1466                         if (NONEXISTENT(current)) {
1467                                 if (top_prev != NULL)
1468                                         top_prev->next = current->next;
1469                                 else
1470                                         node->data = current->next;
1471                                 free_rdataset(rbtdb, mctx, current);
1472                         } else
1473                                 top_prev = current;
1474                 }
1475         }
1476         if (!still_dirty)
1477                 node->dirty = 0;
1478 }
1479
1480 static void
1481 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node)
1482 {
1483         dns_rbtnode_t *nsecnode;
1484         dns_fixedname_t fname;
1485         dns_name_t *name;
1486         isc_result_t result = ISC_R_UNEXPECTED;
1487
1488         INSIST(!ISC_LINK_LINKED(node, deadlink));
1489
1490         switch (node->nsec) {
1491         case DNS_RBT_NSEC_NORMAL:
1492                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1493                 break;
1494         case DNS_RBT_NSEC_HAS_NSEC:
1495                 dns_fixedname_init(&fname);
1496                 name = dns_fixedname_name(&fname);
1497                 dns_rbt_fullnamefromnode(node, name);
1498                 /*
1499                  * Delete the corresponding node from the auxiliary NSEC
1500                  * tree before deleting from the main tree.
1501                  */
1502                 nsecnode = NULL;
1503                 result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
1504                                           NULL, DNS_RBTFIND_EMPTYDATA,
1505                                           NULL, NULL);
1506                 if (result != ISC_R_SUCCESS) {
1507                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
1508                                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
1509                                       "delete_node: "
1510                                       "dns_rbt_findnode(nsec): %s",
1511                                       isc_result_totext(result));
1512                 } else {
1513                         result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
1514                                                     ISC_FALSE);
1515                         if (result != ISC_R_SUCCESS) {
1516                                 isc_log_write(dns_lctx,
1517                                               DNS_LOGCATEGORY_DATABASE,
1518                                               DNS_LOGMODULE_CACHE,
1519                                               ISC_LOG_WARNING,
1520                                               "delete_nsecnode(): "
1521                                               "dns_rbt_deletenode(nsecnode): %s",
1522                                               isc_result_totext(result));
1523                         }
1524                 }
1525                 result = dns_rbt_deletenode(rbtdb->tree, node, ISC_FALSE);
1526                 break;
1527         case DNS_RBT_NSEC_NSEC:
1528                 result = dns_rbt_deletenode(rbtdb->nsec, node, ISC_FALSE);
1529                 break;
1530         case DNS_RBT_NSEC_NSEC3:
1531                 result = dns_rbt_deletenode(rbtdb->nsec3, node, ISC_FALSE);
1532                 break;
1533         }
1534         if (result != ISC_R_SUCCESS) {
1535                 isc_log_write(dns_lctx,
1536                               DNS_LOGCATEGORY_DATABASE,
1537                               DNS_LOGMODULE_CACHE,
1538                               ISC_LOG_WARNING,
1539                               "delete_nsecnode(): "
1540                               "dns_rbt_deletenode: %s",
1541                               isc_result_totext(result));
1542         }
1543 }
1544
1545 /*%
1546  * Clean up dead nodes.  These are nodes which have no references, and
1547  * have no data.  They are dead but we could not or chose not to delete
1548  * them when we deleted all the data at that node because we did not want
1549  * to wait for the tree write lock.
1550  *
1551  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
1552  */
1553 static void
1554 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
1555         dns_rbtnode_t *node;
1556         int count = 10;         /* XXXJT: should be adjustable */
1557
1558         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1559         while (node != NULL && count > 0) {
1560                 ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
1561
1562                 /*
1563                  * Since we're holding a tree write lock, it should be
1564                  * impossible for this node to be referenced by others.
1565                  */
1566                 INSIST(dns_rbtnode_refcurrent(node) == 0 &&
1567                        node->data == NULL);
1568
1569                 delete_node(rbtdb, node);
1570
1571                 node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
1572                 count--;
1573         }
1574 }
1575
1576 /*
1577  * Caller must be holding the node lock if its reference must be protected
1578  * by the lock.
1579  */
1580 static inline void
1581 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
1582         unsigned int lockrefs, noderefs;
1583         isc_refcount_t *lockref;
1584
1585         dns_rbtnode_refincrement0(node, &noderefs);
1586         if (noderefs == 1) {    /* this is the first reference to the node */
1587                 lockref = &rbtdb->node_locks[node->locknum].references;
1588                 isc_refcount_increment0(lockref, &lockrefs);
1589                 INSIST(lockrefs != 0);
1590         }
1591         INSIST(noderefs != 0);
1592 }
1593
1594 /*
1595  * This function is assumed to be called when a node is newly referenced
1596  * and can be in the deadnode list.  In that case the node must be retrieved
1597  * from the list because it is going to be used.  In addition, if the caller
1598  * happens to hold a write lock on the tree, it's a good chance to purge dead
1599  * nodes.
1600  * Note: while a new reference is gained in multiple places, there are only very
1601  * few cases where the node can be in the deadnode list (only empty nodes can
1602  * have been added to the list).
1603  */
1604 static inline void
1605 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1606                 isc_rwlocktype_t treelocktype)
1607 {
1608         isc_boolean_t need_relock = ISC_FALSE;
1609
1610         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
1611         new_reference(rbtdb, node);
1612
1613         NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1614                       isc_rwlocktype_read);
1615         if (ISC_LINK_LINKED(node, deadlink))
1616                 need_relock = ISC_TRUE;
1617         else if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
1618                  treelocktype == isc_rwlocktype_write)
1619                 need_relock = ISC_TRUE;
1620         NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1621                         isc_rwlocktype_read);
1622         if (need_relock) {
1623                 NODE_WEAKLOCK(&rbtdb->node_locks[node->locknum].lock,
1624                               isc_rwlocktype_write);
1625                 if (ISC_LINK_LINKED(node, deadlink))
1626                         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum],
1627                                         node, deadlink);
1628                 if (treelocktype == isc_rwlocktype_write)
1629                         cleanup_dead_nodes(rbtdb, node->locknum);
1630                 NODE_WEAKUNLOCK(&rbtdb->node_locks[node->locknum].lock,
1631                                 isc_rwlocktype_write);
1632         }
1633
1634         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
1635 }
1636
1637 /*
1638  * Caller must be holding the node lock; either the "strong", read or write
1639  * lock.  Note that the lock must be held even when node references are
1640  * atomically modified; in that case the decrement operation itself does not
1641  * have to be protected, but we must avoid a race condition where multiple
1642  * threads are decreasing the reference to zero simultaneously and at least
1643  * one of them is going to free the node.
1644  * This function returns ISC_TRUE if and only if the node reference decreases
1645  * to zero.
1646  */
1647 static isc_boolean_t
1648 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
1649                     rbtdb_serial_t least_serial,
1650                     isc_rwlocktype_t nlock, isc_rwlocktype_t tlock,
1651                     isc_boolean_t pruning)
1652 {
1653         isc_result_t result;
1654         isc_boolean_t write_locked;
1655         rbtdb_nodelock_t *nodelock;
1656         unsigned int refs, nrefs;
1657         int bucket = node->locknum;
1658         isc_boolean_t no_reference;
1659
1660         nodelock = &rbtdb->node_locks[bucket];
1661
1662         /* Handle easy and typical case first. */
1663         if (!node->dirty && (node->data != NULL || node->down != NULL)) {
1664                 dns_rbtnode_refdecrement(node, &nrefs);
1665                 INSIST((int)nrefs >= 0);
1666                 if (nrefs == 0) {
1667                         isc_refcount_decrement(&nodelock->references, &refs);
1668                         INSIST((int)refs >= 0);
1669                 }
1670                 return ((nrefs == 0) ? ISC_TRUE : ISC_FALSE);
1671         }
1672
1673         /* Upgrade the lock? */
1674         if (nlock == isc_rwlocktype_read) {
1675                 NODE_WEAKUNLOCK(&nodelock->lock, isc_rwlocktype_read);
1676                 NODE_WEAKLOCK(&nodelock->lock, isc_rwlocktype_write);
1677         }
1678         dns_rbtnode_refdecrement(node, &nrefs);
1679         INSIST((int)nrefs >= 0);
1680         if (nrefs > 0) {
1681                 /* Restore the lock? */
1682                 if (nlock == isc_rwlocktype_read)
1683                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1684                 return (ISC_FALSE);
1685         }
1686
1687         if (node->dirty && dns_rbtnode_refcurrent(node) == 0) {
1688                 if (IS_CACHE(rbtdb))
1689                         clean_cache_node(rbtdb, node);
1690                 else {
1691                         if (least_serial == 0) {
1692                                 /*
1693                                  * Caller doesn't know the least serial.
1694                                  * Get it.
1695                                  */
1696                                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
1697                                 least_serial = rbtdb->least_serial;
1698                                 RBTDB_UNLOCK(&rbtdb->lock,
1699                                              isc_rwlocktype_read);
1700                         }
1701                         clean_zone_node(rbtdb, node, least_serial);
1702                 }
1703         }
1704
1705         isc_refcount_decrement(&nodelock->references, &refs);
1706         INSIST((int)refs >= 0);
1707
1708         /*
1709          * XXXDCL should this only be done for cache zones?
1710          */
1711         if (node->data != NULL || node->down != NULL) {
1712                 /* Restore the lock? */
1713                 if (nlock == isc_rwlocktype_read)
1714                         NODE_WEAKDOWNGRADE(&nodelock->lock);
1715                 return (ISC_TRUE);
1716         }
1717
1718         /*
1719          * Attempt to switch to a write lock on the tree.  If this fails,
1720          * we will add this node to a linked list of nodes in this locking
1721          * bucket which we will free later.
1722          */
1723         if (tlock != isc_rwlocktype_write) {
1724                 /*
1725                  * Locking hierarchy notwithstanding, we don't need to free
1726                  * the node lock before acquiring the tree write lock because
1727                  * we only do a trylock.
1728                  */
1729                 if (tlock == isc_rwlocktype_read)
1730                         result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
1731                 else
1732                         result = isc_rwlock_trylock(&rbtdb->tree_lock,
1733                                                     isc_rwlocktype_write);
1734                 RUNTIME_CHECK(result == ISC_R_SUCCESS ||
1735                               result == ISC_R_LOCKBUSY);
1736
1737                 write_locked = ISC_TF(result == ISC_R_SUCCESS);
1738         } else
1739                 write_locked = ISC_TRUE;
1740
1741         no_reference = ISC_TRUE;
1742         if (write_locked && dns_rbtnode_refcurrent(node) == 0) {
1743                 /*
1744                  * We can now delete the node if the reference counter is
1745                  * zero.  This should be typically the case, but a different
1746                  * thread may still gain a (new) reference just before the
1747                  * current thread locks the tree (e.g., in findnode()).
1748                  */
1749
1750                 /*
1751                  * If this node is the only one in the level it's in, deleting
1752                  * this node may recursively make its parent the only node in
1753                  * the parent level; if so, and if no one is currently using
1754                  * the parent node, this is almost the only opportunity to
1755                  * clean it up.  But the recursive cleanup is not that trivial
1756                  * since the child and parent may be in different lock buckets,
1757                  * which would cause a lock order reversal problem.  To avoid
1758                  * the trouble, we'll dispatch a separate event for batch
1759                  * cleaning.  We need to check whether we're deleting the node
1760                  * as a result of pruning to avoid infinite dispatching.
1761                  * Note: pruning happens only when a task has been set for the
1762                  * rbtdb.  If the user of the rbtdb chooses not to set a task,
1763                  * it's their responsibility to purge stale leaves (e.g. by
1764                  * periodic walk-through).
1765                  */
1766                 if (!pruning && node->parent != NULL &&
1767                     node->parent->down == node && node->left == NULL &&
1768                     node->right == NULL && rbtdb->task != NULL) {
1769                         isc_event_t *ev;
1770                         dns_db_t *db;
1771
1772                         ev = isc_event_allocate(rbtdb->common.mctx, NULL,
1773                                                 DNS_EVENT_RBTPRUNE,
1774                                                 prune_tree, node,
1775                                                 sizeof(isc_event_t));
1776                         if (ev != NULL) {
1777                                 new_reference(rbtdb, node);
1778                                 db = NULL;
1779                                 attach((dns_db_t *)rbtdb, &db);
1780                                 ev->ev_sender = db;
1781                                 isc_task_send(rbtdb->task, &ev);
1782                                 no_reference = ISC_FALSE;
1783                         } else {
1784                                 /*
1785                                  * XXX: this is a weird situation.  We could
1786                                  * ignore this error case, but then the stale
1787                                  * node will unlikely be purged except via a
1788                                  * rare condition such as manual cleanup.  So
1789                                  * we queue it in the deadnodes list, hoping
1790                                  * the memory shortage is temporary and the node
1791                                  * will be deleted later.
1792                                  */
1793                                 isc_log_write(dns_lctx,
1794                                               DNS_LOGCATEGORY_DATABASE,
1795                                               DNS_LOGMODULE_CACHE,
1796                                               ISC_LOG_INFO,
1797                                               "decrement_reference: failed to "
1798                                               "allocate pruning event");
1799                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1800                                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
1801                                                 deadlink);
1802                         }
1803                 } else {
1804                         if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
1805                                 char printname[DNS_NAME_FORMATSIZE];
1806
1807                                 isc_log_write(dns_lctx,
1808                                               DNS_LOGCATEGORY_DATABASE,
1809                                               DNS_LOGMODULE_CACHE,
1810                                               ISC_LOG_DEBUG(1),
1811                                               "decrement_reference: "
1812                                               "delete from rbt: %p %s",
1813                                               node,
1814                                               dns_rbt_formatnodename(node,
1815                                                         printname,
1816                                                         sizeof(printname)));
1817                         }
1818
1819                         delete_node(rbtdb, node);
1820                 }
1821         } else if (dns_rbtnode_refcurrent(node) == 0) {
1822                 INSIST(!ISC_LINK_LINKED(node, deadlink));
1823                 ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node, deadlink);
1824         } else
1825                 no_reference = ISC_FALSE;
1826
1827         /* Restore the lock? */
1828         if (nlock == isc_rwlocktype_read)
1829                 NODE_WEAKDOWNGRADE(&nodelock->lock);
1830
1831         /*
1832          * Relock a read lock, or unlock the write lock if no lock was held.
1833          */
1834         if (tlock == isc_rwlocktype_none)
1835                 if (write_locked)
1836                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1837
1838         if (tlock == isc_rwlocktype_read)
1839                 if (write_locked)
1840                         isc_rwlock_downgrade(&rbtdb->tree_lock);
1841
1842         return (no_reference);
1843 }
1844
1845 /*
1846  * Prune the tree by recursively cleaning-up single leaves.  In the worst
1847  * case, the number of iteration is the number of tree levels, which is at
1848  * most the maximum number of domain name labels, i.e, 127.  In practice, this
1849  * should be much smaller (only a few times), and even the worst case would be
1850  * acceptable for a single event.
1851  */
1852 static void
1853 prune_tree(isc_task_t *task, isc_event_t *event) {
1854         dns_rbtdb_t *rbtdb = event->ev_sender;
1855         dns_rbtnode_t *node = event->ev_arg;
1856         dns_rbtnode_t *parent;
1857         unsigned int locknum;
1858
1859         UNUSED(task);
1860
1861         isc_event_free(&event);
1862
1863         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1864         locknum = node->locknum;
1865         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1866         do {
1867                 parent = node->parent;
1868                 decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
1869                                     isc_rwlocktype_write, ISC_TRUE);
1870
1871                 if (parent != NULL && parent->down == NULL) {
1872                         /*
1873                          * node was the only down child of the parent and has
1874                          * just been removed.  We'll then need to examine the
1875                          * parent.  Keep the lock if possible; otherwise,
1876                          * release the old lock and acquire one for the parent.
1877                          */
1878                         if (parent->locknum != locknum) {
1879                                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
1880                                             isc_rwlocktype_write);
1881                                 locknum = parent->locknum;
1882                                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
1883                                           isc_rwlocktype_write);
1884                         }
1885
1886                         /*
1887                          * We need to gain a reference to the node before
1888                          * decrementing it in the next iteration.  In addition,
1889                          * if the node is in the dead-nodes list, extract it
1890                          * from the list beforehand as we do in
1891                          * reactivate_node().
1892                          */
1893                         new_reference(rbtdb, parent);
1894                         if (ISC_LINK_LINKED(parent, deadlink)) {
1895                                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
1896                                                 parent, deadlink);
1897                         }
1898                 } else
1899                         parent = NULL;
1900
1901                 node = parent;
1902         } while (node != NULL);
1903         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
1904         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
1905
1906         detach((dns_db_t **)&rbtdb);
1907 }
1908
1909 static inline void
1910 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
1911                    rbtdb_changedlist_t *cleanup_list)
1912 {
1913         /*
1914          * Caller must be holding the database lock.
1915          */
1916
1917         rbtdb->least_serial = version->serial;
1918         *cleanup_list = version->changed_list;
1919         ISC_LIST_INIT(version->changed_list);
1920 }
1921
1922 static inline void
1923 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
1924         rbtdb_changed_t *changed, *next_changed;
1925
1926         /*
1927          * If the changed record is dirty, then
1928          * an update created multiple versions of
1929          * a given rdataset.  We keep this list
1930          * until we're the least open version, at
1931          * which point it's safe to get rid of any
1932          * older versions.
1933          *
1934          * If the changed record isn't dirty, then
1935          * we don't need it anymore since we're
1936          * committing and not rolling back.
1937          *
1938          * The caller must be holding the database lock.
1939          */
1940         for (changed = HEAD(version->changed_list);
1941              changed != NULL;
1942              changed = next_changed) {
1943                 next_changed = NEXT(changed, link);
1944                 if (!changed->dirty) {
1945                         UNLINK(version->changed_list,
1946                                changed, link);
1947                         APPEND(*cleanup_list,
1948                                changed, link);
1949                 }
1950         }
1951 }
1952
1953 static void
1954 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
1955 #ifndef BIND9
1956         UNUSED(db);
1957         UNUSED(version);
1958         UNUSED(origin);
1959
1960         return;
1961 #else
1962         dns_rdataset_t keyset;
1963         dns_rdataset_t nsecset, signsecset;
1964         isc_boolean_t haszonekey = ISC_FALSE;
1965         isc_boolean_t hasnsec = ISC_FALSE;
1966         isc_result_t result;
1967
1968         dns_rdataset_init(&keyset);
1969         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
1970                                      0, 0, &keyset, NULL);
1971         if (result == ISC_R_SUCCESS) {
1972                 dns_rdata_t keyrdata = DNS_RDATA_INIT;
1973                 result = dns_rdataset_first(&keyset);
1974                 while (result == ISC_R_SUCCESS) {
1975                         dns_rdataset_current(&keyset, &keyrdata);
1976                         if (dns_zonekey_iszonekey(&keyrdata)) {
1977                                 haszonekey = ISC_TRUE;
1978                                 break;
1979                         }
1980                         result = dns_rdataset_next(&keyset);
1981                 }
1982                 dns_rdataset_disassociate(&keyset);
1983         }
1984         if (!haszonekey) {
1985                 version->secure = dns_db_insecure;
1986                 version->havensec3 = ISC_FALSE;
1987                 return;
1988         }
1989
1990         dns_rdataset_init(&nsecset);
1991         dns_rdataset_init(&signsecset);
1992         result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec,
1993                                      0, 0, &nsecset, &signsecset);
1994         if (result == ISC_R_SUCCESS) {
1995                 if (dns_rdataset_isassociated(&signsecset)) {
1996                         hasnsec = ISC_TRUE;
1997                         dns_rdataset_disassociate(&signsecset);
1998                 }
1999                 dns_rdataset_disassociate(&nsecset);
2000         }
2001
2002         setnsec3parameters(db, version);
2003
2004         /*
2005          * Do we have a valid NSEC/NSEC3 chain?
2006          */
2007         if (version->havensec3 || hasnsec)
2008                 version->secure = dns_db_secure;
2009         else
2010                 version->secure = dns_db_insecure;
2011 #endif
2012 }
2013
2014 /*%<
2015  * Walk the origin node looking for NSEC3PARAM records.
2016  * Cache the nsec3 parameters.
2017  */
2018 #ifdef BIND9
2019 static void
2020 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
2021         dns_rbtnode_t *node;
2022         dns_rdata_nsec3param_t nsec3param;
2023         dns_rdata_t rdata = DNS_RDATA_INIT;
2024         isc_region_t region;
2025         isc_result_t result;
2026         rdatasetheader_t *header, *header_next;
2027         unsigned char *raw;             /* RDATASLAB */
2028         unsigned int count, length;
2029         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2030
2031         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2032         version->havensec3 = ISC_FALSE;
2033         node = rbtdb->origin_node;
2034         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2035                   isc_rwlocktype_read);
2036         for (header = node->data;
2037              header != NULL;
2038              header = header_next) {
2039                 header_next = header->next;
2040                 do {
2041                         if (header->serial <= version->serial &&
2042                             !IGNORE(header)) {
2043                                 if (NONEXISTENT(header))
2044                                         header = NULL;
2045                                 break;
2046                         } else
2047                                 header = header->down;
2048                 } while (header != NULL);
2049
2050                 if (header != NULL &&
2051                     (header->type == dns_rdatatype_nsec3param)) {
2052                         /*
2053                          * Find A NSEC3PARAM with a supported algorithm.
2054                          */
2055                         raw = (unsigned char *)header + sizeof(*header);
2056                         count = raw[0] * 256 + raw[1]; /* count */
2057 #if DNS_RDATASET_FIXED
2058                         raw += count * 4 + 2;
2059 #else
2060                         raw += 2;
2061 #endif
2062                         while (count-- > 0U) {
2063                                 length = raw[0] * 256 + raw[1];
2064 #if DNS_RDATASET_FIXED
2065                                 raw += 4;
2066 #else
2067                                 raw += 2;
2068 #endif
2069                                 region.base = raw;
2070                                 region.length = length;
2071                                 raw += length;
2072                                 dns_rdata_fromregion(&rdata,
2073                                                      rbtdb->common.rdclass,
2074                                                      dns_rdatatype_nsec3param,
2075                                                      &region);
2076                                 result = dns_rdata_tostruct(&rdata,
2077                                                             &nsec3param,
2078                                                             NULL);
2079                                 INSIST(result == ISC_R_SUCCESS);
2080                                 dns_rdata_reset(&rdata);
2081
2082                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
2083                                     !dns_nsec3_supportedhash(nsec3param.hash))
2084                                         continue;
2085
2086                                 if (nsec3param.flags != 0)
2087                                         continue;
2088
2089                                 memcpy(version->salt, nsec3param.salt,
2090                                        nsec3param.salt_length);
2091                                 version->hash = nsec3param.hash;
2092                                 version->salt_length = nsec3param.salt_length;
2093                                 version->iterations = nsec3param.iterations;
2094                                 version->flags = nsec3param.flags;
2095                                 version->havensec3 = ISC_TRUE;
2096                                 /*
2097                                  * Look for a better algorithm than the
2098                                  * unknown test algorithm.
2099                                  */
2100                                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG)
2101                                         goto unlock;
2102                         }
2103                 }
2104         }
2105  unlock:
2106         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2107                     isc_rwlocktype_read);
2108         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
2109 }
2110 #endif
2111
2112 static void
2113 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
2114         dns_rbtdb_t *rbtdb = event->ev_arg;
2115         isc_boolean_t again = ISC_FALSE;
2116         unsigned int locknum;
2117         unsigned int refs;
2118
2119         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2120         for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
2121                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
2122                           isc_rwlocktype_write);
2123                 cleanup_dead_nodes(rbtdb, locknum);
2124                 if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL)
2125                         again = ISC_TRUE;
2126                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
2127                             isc_rwlocktype_write);
2128         }
2129         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2130         if (again)
2131                 isc_task_send(task, &event);
2132         else {
2133                 isc_event_free(&event);
2134                 isc_refcount_decrement(&rbtdb->references, &refs);
2135                 if (refs == 0)
2136                         maybe_free_rbtdb(rbtdb);
2137         }
2138 }
2139
2140 static void
2141 closeversion(dns_db_t *db, dns_dbversion_t **versionp, isc_boolean_t commit) {
2142         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2143         rbtdb_version_t *version, *cleanup_version, *least_greater;
2144         isc_boolean_t rollback = ISC_FALSE;
2145         rbtdb_changedlist_t cleanup_list;
2146         rdatasetheaderlist_t resigned_list;
2147         rbtdb_changed_t *changed, *next_changed;
2148         rbtdb_serial_t serial, least_serial;
2149         dns_rbtnode_t *rbtnode;
2150         unsigned int refs;
2151         rdatasetheader_t *header;
2152         isc_boolean_t writer;
2153
2154         REQUIRE(VALID_RBTDB(rbtdb));
2155         version = (rbtdb_version_t *)*versionp;
2156
2157         cleanup_version = NULL;
2158         ISC_LIST_INIT(cleanup_list);
2159         ISC_LIST_INIT(resigned_list);
2160
2161         isc_refcount_decrement(&version->references, &refs);
2162         if (refs > 0) {         /* typical and easy case first */
2163                 if (commit) {
2164                         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
2165                         INSIST(!version->writer);
2166                         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
2167                 }
2168                 goto end;
2169         }
2170
2171         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
2172         serial = version->serial;
2173         writer = version->writer;
2174         if (version->writer) {
2175                 if (commit) {
2176                         unsigned cur_ref;
2177                         rbtdb_version_t *cur_version;
2178
2179                         INSIST(version->commit_ok);
2180                         INSIST(version == rbtdb->future_version);
2181                         /*
2182                          * The current version is going to be replaced.
2183                          * Release the (likely last) reference to it from the
2184                          * DB itself and unlink it from the open list.
2185                          */
2186                         cur_version = rbtdb->current_version;
2187                         isc_refcount_decrement(&cur_version->references,
2188                                                &cur_ref);
2189                         if (cur_ref == 0) {
2190                                 if (cur_version->serial == rbtdb->least_serial)
2191                                         INSIST(EMPTY(cur_version->changed_list));
2192                                 UNLINK(rbtdb->open_versions,
2193                                        cur_version, link);
2194                         }
2195                         if (EMPTY(rbtdb->open_versions)) {
2196                                 /*
2197                                  * We're going to become the least open
2198                                  * version.
2199                                  */
2200                                 make_least_version(rbtdb, version,
2201                                                    &cleanup_list);
2202                         } else {
2203                                 /*
2204                                  * Some other open version is the
2205                                  * least version.  We can't cleanup
2206                                  * records that were changed in this
2207                                  * version because the older versions
2208                                  * may still be in use by an open
2209                                  * version.
2210                                  *
2211                                  * We can, however, discard the
2212                                  * changed records for things that
2213                                  * we've added that didn't exist in
2214                                  * prior versions.
2215                                  */
2216                                 cleanup_nondirty(version, &cleanup_list);
2217                         }
2218                         /*
2219                          * If the (soon to be former) current version
2220                          * isn't being used by anyone, we can clean
2221                          * it up.
2222                          */
2223                         if (cur_ref == 0) {
2224                                 cleanup_version = cur_version;
2225                                 APPENDLIST(version->changed_list,
2226                                            cleanup_version->changed_list,
2227                                            link);
2228                         }
2229                         /*
2230                          * Become the current version.
2231                          */
2232                         version->writer = ISC_FALSE;
2233                         rbtdb->current_version = version;
2234                         rbtdb->current_serial = version->serial;
2235                         rbtdb->future_version = NULL;
2236
2237                         /*
2238                          * Keep the current version in the open list, and
2239                          * gain a reference for the DB itself (see the DB
2240                          * creation function below).  This must be the only
2241                          * case where we need to increment the counter from
2242                          * zero and need to use isc_refcount_increment0().
2243                          */
2244                         isc_refcount_increment0(&version->references,
2245                                                 &cur_ref);
2246                         INSIST(cur_ref == 1);
2247                         PREPEND(rbtdb->open_versions,
2248                                 rbtdb->current_version, link);
2249                         resigned_list = version->resigned_list;
2250                         ISC_LIST_INIT(version->resigned_list);
2251                 } else {
2252                         /*
2253                          * We're rolling back this transaction.
2254                          */
2255                         cleanup_list = version->changed_list;
2256                         ISC_LIST_INIT(version->changed_list);
2257                         resigned_list = version->resigned_list;
2258                         ISC_LIST_INIT(version->resigned_list);
2259                         rollback = ISC_TRUE;
2260                         cleanup_version = version;
2261                         rbtdb->future_version = NULL;
2262                 }
2263         } else {
2264                 if (version != rbtdb->current_version) {
2265                         /*
2266                          * There are no external or internal references
2267                          * to this version and it can be cleaned up.
2268                          */
2269                         cleanup_version = version;
2270
2271                         /*
2272                          * Find the version with the least serial
2273                          * number greater than ours.
2274                          */
2275                         least_greater = PREV(version, link);
2276                         if (least_greater == NULL)
2277                                 least_greater = rbtdb->current_version;
2278
2279                         INSIST(version->serial < least_greater->serial);
2280                         /*
2281                          * Is this the least open version?
2282                          */
2283                         if (version->serial == rbtdb->least_serial) {
2284                                 /*
2285                                  * Yes.  Install the new least open
2286                                  * version.
2287                                  */
2288                                 make_least_version(rbtdb,
2289                                                    least_greater,
2290                                                    &cleanup_list);
2291                         } else {
2292                                 /*
2293                                  * Add any unexecuted cleanups to
2294                                  * those of the least greater version.
2295                                  */
2296                                 APPENDLIST(least_greater->changed_list,
2297                                            version->changed_list,
2298                                            link);
2299                         }
2300                 } else if (version->serial == rbtdb->least_serial)
2301                         INSIST(EMPTY(version->changed_list));
2302                 UNLINK(rbtdb->open_versions, version, link);
2303         }
2304         least_serial = rbtdb->least_serial;
2305         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
2306
2307         /*
2308          * Update the zone's secure status.
2309          */
2310         if (writer && commit && !IS_CACHE(rbtdb))
2311                 iszonesecure(db, version, rbtdb->origin_node);
2312
2313         if (cleanup_version != NULL) {
2314                 INSIST(EMPTY(cleanup_version->changed_list));
2315                 isc_mem_put(rbtdb->common.mctx, cleanup_version,
2316                             sizeof(*cleanup_version));
2317         }
2318
2319         /*
2320          * Commit/rollback re-signed headers.
2321          */
2322         for (header = HEAD(resigned_list);
2323              header != NULL;
2324              header = HEAD(resigned_list)) {
2325                 nodelock_t *lock;
2326
2327                 ISC_LIST_UNLINK(resigned_list, header, link);
2328
2329                 lock = &rbtdb->node_locks[header->node->locknum].lock;
2330                 NODE_LOCK(lock, isc_rwlocktype_write);
2331                 if (rollback)
2332                         resign_insert(rbtdb, header->node->locknum, header);
2333                 decrement_reference(rbtdb, header->node, least_serial,
2334                                     isc_rwlocktype_write, isc_rwlocktype_none,
2335                                     ISC_FALSE);
2336                 NODE_UNLOCK(lock, isc_rwlocktype_write);
2337         }
2338
2339         if (!EMPTY(cleanup_list)) {
2340                 isc_event_t *event = NULL;
2341                 isc_rwlocktype_t tlock = isc_rwlocktype_none;
2342
2343                 if (rbtdb->task != NULL)
2344                         event = isc_event_allocate(rbtdb->common.mctx, NULL,
2345                                                    DNS_EVENT_RBTDEADNODES,
2346                                                    cleanup_dead_nodes_callback,
2347                                                    rbtdb, sizeof(isc_event_t));
2348                 if (event == NULL) {
2349                         /*
2350                          * We acquire a tree write lock here in order to make
2351                          * sure that stale nodes will be removed in
2352                          * decrement_reference().  If we didn't have the lock,
2353                          * those nodes could miss the chance to be removed
2354                          * until the server stops.  The write lock is
2355                          * expensive, but this event should be rare enough
2356                          * to justify the cost.
2357                          */
2358                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2359                         tlock = isc_rwlocktype_write;
2360                 }
2361
2362                 for (changed = HEAD(cleanup_list);
2363                      changed != NULL;
2364                      changed = next_changed) {
2365                         nodelock_t *lock;
2366
2367                         next_changed = NEXT(changed, link);
2368                         rbtnode = changed->node;
2369                         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
2370
2371                         NODE_LOCK(lock, isc_rwlocktype_write);
2372                         /*
2373                          * This is a good opportunity to purge any dead nodes,
2374                          * so use it.
2375                          */
2376                         if (event == NULL)
2377                                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
2378
2379                         if (rollback)
2380                                 rollback_node(rbtnode, serial);
2381                         decrement_reference(rbtdb, rbtnode, least_serial,
2382                                             isc_rwlocktype_write, tlock,
2383                                             ISC_FALSE);
2384
2385                         NODE_UNLOCK(lock, isc_rwlocktype_write);
2386
2387                         isc_mem_put(rbtdb->common.mctx, changed,
2388                                     sizeof(*changed));
2389                 }
2390                 if (event != NULL) {
2391                         isc_refcount_increment(&rbtdb->references, NULL);
2392                         isc_task_send(rbtdb->task, &event);
2393                 } else
2394                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
2395         }
2396
2397  end:
2398         *versionp = NULL;
2399 }
2400
2401 /*
2402  * Add the necessary magic for the wildcard name 'name'
2403  * to be found in 'rbtdb'.
2404  *
2405  * In order for wildcard matching to work correctly in
2406  * zone_find(), we must ensure that a node for the wildcarding
2407  * level exists in the database, and has its 'find_callback'
2408  * and 'wild' bits set.
2409  *
2410  * E.g. if the wildcard name is "*.sub.example." then we
2411  * must ensure that "sub.example." exists and is marked as
2412  * a wildcard level.
2413  */
2414 static isc_result_t
2415 add_wildcard_magic(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2416         isc_result_t result;
2417         dns_name_t foundname;
2418         dns_offsets_t offsets;
2419         unsigned int n;
2420         dns_rbtnode_t *node = NULL;
2421
2422         dns_name_init(&foundname, offsets);
2423         n = dns_name_countlabels(name);
2424         INSIST(n >= 2);
2425         n--;
2426         dns_name_getlabelsequence(name, 1, n, &foundname);
2427         result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
2428         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2429                 return (result);
2430         if (result == ISC_R_SUCCESS)
2431                 node->nsec = DNS_RBT_NSEC_NORMAL;
2432         node->find_callback = 1;
2433         node->wild = 1;
2434         return (ISC_R_SUCCESS);
2435 }
2436
2437 static isc_result_t
2438 add_empty_wildcards(dns_rbtdb_t *rbtdb, dns_name_t *name) {
2439         isc_result_t result;
2440         dns_name_t foundname;
2441         dns_offsets_t offsets;
2442         unsigned int n, l, i;
2443
2444         dns_name_init(&foundname, offsets);
2445         n = dns_name_countlabels(name);
2446         l = dns_name_countlabels(&rbtdb->common.origin);
2447         i = l + 1;
2448         while (i < n) {
2449                 dns_rbtnode_t *node = NULL;     /* dummy */
2450                 dns_name_getlabelsequence(name, n - i, i, &foundname);
2451                 if (dns_name_iswildcard(&foundname)) {
2452                         result = add_wildcard_magic(rbtdb, &foundname);
2453                         if (result != ISC_R_SUCCESS)
2454                                 return (result);
2455                         result = dns_rbt_addnode(rbtdb->tree, &foundname,
2456                                                  &node);
2457                         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
2458                                 return (result);
2459                         if (result == ISC_R_SUCCESS)
2460                                 node->nsec = DNS_RBT_NSEC_NORMAL;
2461                 }
2462                 i++;
2463         }
2464         return (ISC_R_SUCCESS);
2465 }
2466
2467 static isc_result_t
2468 findnode(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2469          dns_dbnode_t **nodep)
2470 {
2471         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2472         dns_rbtnode_t *node = NULL;
2473         dns_name_t nodename;
2474         isc_result_t result;
2475         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2476
2477         REQUIRE(VALID_RBTDB(rbtdb));
2478
2479         dns_name_init(&nodename, NULL);
2480         RWLOCK(&rbtdb->tree_lock, locktype);
2481         result = dns_rbt_findnode(rbtdb->tree, name, NULL, &node, NULL,
2482                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2483         if (result != ISC_R_SUCCESS) {
2484                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2485                 if (!create) {
2486                         if (result == DNS_R_PARTIALMATCH)
2487                                 result = ISC_R_NOTFOUND;
2488                         return (result);
2489                 }
2490                 /*
2491                  * It would be nice to try to upgrade the lock instead of
2492                  * unlocking then relocking.
2493                  */
2494                 locktype = isc_rwlocktype_write;
2495                 RWLOCK(&rbtdb->tree_lock, locktype);
2496                 node = NULL;
2497                 result = dns_rbt_addnode(rbtdb->tree, name, &node);
2498                 if (result == ISC_R_SUCCESS) {
2499                         dns_rbt_namefromnode(node, &nodename);
2500 #ifdef DNS_RBT_USEHASH
2501                         node->locknum = node->hashval % rbtdb->node_lock_count;
2502 #else
2503                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2504                                 rbtdb->node_lock_count;
2505 #endif
2506                         add_empty_wildcards(rbtdb, name);
2507
2508                         if (dns_name_iswildcard(name)) {
2509                                 result = add_wildcard_magic(rbtdb, name);
2510                                 if (result != ISC_R_SUCCESS) {
2511                                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2512                                         return (result);
2513                                 }
2514                         }
2515                 } else if (result != ISC_R_EXISTS) {
2516                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2517                         return (result);
2518                 }
2519         }
2520         reactivate_node(rbtdb, node, locktype);
2521         RWUNLOCK(&rbtdb->tree_lock, locktype);
2522
2523         *nodep = (dns_dbnode_t *)node;
2524
2525         return (ISC_R_SUCCESS);
2526 }
2527
2528 static isc_result_t
2529 findnsec3node(dns_db_t *db, dns_name_t *name, isc_boolean_t create,
2530               dns_dbnode_t **nodep)
2531 {
2532         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
2533         dns_rbtnode_t *node = NULL;
2534         dns_name_t nodename;
2535         isc_result_t result;
2536         isc_rwlocktype_t locktype = isc_rwlocktype_read;
2537
2538         REQUIRE(VALID_RBTDB(rbtdb));
2539
2540         dns_name_init(&nodename, NULL);
2541         RWLOCK(&rbtdb->tree_lock, locktype);
2542         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL, &node, NULL,
2543                                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
2544         if (result != ISC_R_SUCCESS) {
2545                 RWUNLOCK(&rbtdb->tree_lock, locktype);
2546                 if (!create) {
2547                         if (result == DNS_R_PARTIALMATCH)
2548                                 result = ISC_R_NOTFOUND;
2549                         return (result);
2550                 }
2551                 /*
2552                  * It would be nice to try to upgrade the lock instead of
2553                  * unlocking then relocking.
2554                  */
2555                 locktype = isc_rwlocktype_write;
2556                 RWLOCK(&rbtdb->tree_lock, locktype);
2557                 node = NULL;
2558                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
2559                 if (result == ISC_R_SUCCESS) {
2560                         dns_rbt_namefromnode(node, &nodename);
2561 #ifdef DNS_RBT_USEHASH
2562                         node->locknum = node->hashval % rbtdb->node_lock_count;
2563 #else
2564                         node->locknum = dns_name_hash(&nodename, ISC_TRUE) %
2565                                 rbtdb->node_lock_count;
2566 #endif
2567                         node->nsec = DNS_RBT_NSEC_NSEC3;
2568                 } else if (result != ISC_R_EXISTS) {
2569                         RWUNLOCK(&rbtdb->tree_lock, locktype);
2570                         return (result);
2571                 }
2572         } else {
2573                 INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
2574         }
2575         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
2576         new_reference(rbtdb, node);
2577         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
2578         RWUNLOCK(&rbtdb->tree_lock, locktype);
2579
2580         *nodep = (dns_dbnode_t *)node;
2581
2582         return (ISC_R_SUCCESS);
2583 }
2584
2585 static isc_result_t
2586 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
2587         rbtdb_search_t *search = arg;
2588         rdatasetheader_t *header, *header_next;
2589         rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
2590         rdatasetheader_t *found;
2591         isc_result_t result;
2592         dns_rbtnode_t *onode;
2593
2594         /*
2595          * We only want to remember the topmost zone cut, since it's the one
2596          * that counts, so we'll just continue if we've already found a
2597          * zonecut.
2598          */
2599         if (search->zonecut != NULL)
2600                 return (DNS_R_CONTINUE);
2601
2602         found = NULL;
2603         result = DNS_R_CONTINUE;
2604         onode = search->rbtdb->origin_node;
2605
2606         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2607                   isc_rwlocktype_read);
2608
2609         /*
2610          * Look for an NS or DNAME rdataset active in our version.
2611          */
2612         ns_header = NULL;
2613         dname_header = NULL;
2614         sigdname_header = NULL;
2615         for (header = node->data; header != NULL; header = header_next) {
2616                 header_next = header->next;
2617                 if (header->type == dns_rdatatype_ns ||
2618                     header->type == dns_rdatatype_dname ||
2619                     header->type == RBTDB_RDATATYPE_SIGDNAME) {
2620                         do {
2621                                 if (header->serial <= search->serial &&
2622                                     !IGNORE(header)) {
2623                                         /*
2624                                          * Is this a "this rdataset doesn't
2625                                          * exist" record?
2626                                          */
2627                                         if (NONEXISTENT(header))
2628                                                 header = NULL;
2629                                         break;
2630                                 } else
2631                                         header = header->down;
2632                         } while (header != NULL);
2633                         if (header != NULL) {
2634                                 if (header->type == dns_rdatatype_dname)
2635                                         dname_header = header;
2636                                 else if (header->type ==
2637                                            RBTDB_RDATATYPE_SIGDNAME)
2638                                         sigdname_header = header;
2639                                 else if (node != onode ||
2640                                          IS_STUB(search->rbtdb)) {
2641                                         /*
2642                                          * We've found an NS rdataset that
2643                                          * isn't at the origin node.  We check
2644                                          * that they're not at the origin node,
2645                                          * because otherwise we'd erroneously
2646                                          * treat the zone top as if it were
2647                                          * a delegation.
2648                                          */
2649                                         ns_header = header;
2650                                 }
2651                         }
2652                 }
2653         }
2654
2655         /*
2656          * Did we find anything?
2657          */
2658         if (dname_header != NULL) {
2659                 /*
2660                  * Note that DNAME has precedence over NS if both exist.
2661                  */
2662                 found = dname_header;
2663                 search->zonecut_sigrdataset = sigdname_header;
2664         } else if (ns_header != NULL) {
2665                 found = ns_header;
2666                 search->zonecut_sigrdataset = NULL;
2667         }
2668
2669         if (found != NULL) {
2670                 /*
2671                  * We increment the reference count on node to ensure that
2672                  * search->zonecut_rdataset will still be valid later.
2673                  */
2674                 new_reference(search->rbtdb, node);
2675                 search->zonecut = node;
2676                 search->zonecut_rdataset = found;
2677                 search->need_cleanup = ISC_TRUE;
2678                 /*
2679                  * Since we've found a zonecut, anything beneath it is
2680                  * glue and is not subject to wildcard matching, so we
2681                  * may clear search->wild.
2682                  */
2683                 search->wild = ISC_FALSE;
2684                 if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
2685                         /*
2686                          * If the caller does not want to find glue, then
2687                          * this is the best answer and the search should
2688                          * stop now.
2689                          */
2690                         result = DNS_R_PARTIALMATCH;
2691                 } else {
2692                         dns_name_t *zcname;
2693
2694                         /*
2695                          * The search will continue beneath the zone cut.
2696                          * This may or may not be the best match.  In case it
2697                          * is, we need to remember the node name.
2698                          */
2699                         zcname = dns_fixedname_name(&search->zonecut_name);
2700                         RUNTIME_CHECK(dns_name_copy(name, zcname, NULL) ==
2701                                       ISC_R_SUCCESS);
2702                         search->copy_name = ISC_TRUE;
2703                 }
2704         } else {
2705                 /*
2706                  * There is no zonecut at this node which is active in this
2707                  * version.
2708                  *
2709                  * If this is a "wild" node and the caller hasn't disabled
2710                  * wildcard matching, remember that we've seen a wild node
2711                  * in case we need to go searching for wildcard matches
2712                  * later on.
2713                  */
2714                 if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0)
2715                         search->wild = ISC_TRUE;
2716         }
2717
2718         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2719                     isc_rwlocktype_read);
2720
2721         return (result);
2722 }
2723
2724 static inline void
2725 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
2726               rdatasetheader_t *header, isc_stdtime_t now,
2727               dns_rdataset_t *rdataset)
2728 {
2729         unsigned char *raw;     /* RDATASLAB */
2730
2731         /*
2732          * Caller must be holding the node reader lock.
2733          * XXXJT: technically, we need a writer lock, since we'll increment
2734          * the header count below.  However, since the actual counter value
2735          * doesn't matter, we prioritize performance here.  (We may want to
2736          * use atomic increment when available).
2737          */
2738
2739         if (rdataset == NULL)
2740                 return;
2741
2742         new_reference(rbtdb, node);
2743
2744         INSIST(rdataset->methods == NULL);      /* We must be disassociated. */
2745
2746         rdataset->methods = &rdataset_methods;
2747         rdataset->rdclass = rbtdb->common.rdclass;
2748         rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
2749         rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
2750         rdataset->ttl = header->rdh_ttl - now;
2751         rdataset->trust = header->trust;
2752         if (NXDOMAIN(header))
2753                 rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
2754         if (OPTOUT(header))
2755                 rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
2756         rdataset->private1 = rbtdb;
2757         rdataset->private2 = node;
2758         raw = (unsigned char *)header + sizeof(*header);
2759         rdataset->private3 = raw;
2760         rdataset->count = header->count++;
2761         if (rdataset->count == ISC_UINT32_MAX)
2762                 rdataset->count = 0;
2763
2764         /*
2765          * Reset iterator state.
2766          */
2767         rdataset->privateuint4 = 0;
2768         rdataset->private5 = NULL;
2769
2770         /*
2771          * Add noqname proof.
2772          */
2773         rdataset->private6 = header->noqname;
2774         if (rdataset->private6 != NULL)
2775                 rdataset->attributes |=  DNS_RDATASETATTR_NOQNAME;
2776         rdataset->private7 = header->closest;
2777         if (rdataset->private7 != NULL)
2778                 rdataset->attributes |=  DNS_RDATASETATTR_CLOSEST;
2779
2780         /*
2781          * Copy out re-signing information.
2782          */
2783         if (RESIGN(header)) {
2784                 rdataset->attributes |=  DNS_RDATASETATTR_RESIGN;
2785                 rdataset->resign = header->resign;
2786         } else
2787                 rdataset->resign = 0;
2788 }
2789
2790 static inline isc_result_t
2791 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
2792                  dns_name_t *foundname, dns_rdataset_t *rdataset,
2793                  dns_rdataset_t *sigrdataset)
2794 {
2795         isc_result_t result;
2796         dns_name_t *zcname;
2797         rbtdb_rdatatype_t type;
2798         dns_rbtnode_t *node;
2799
2800         /*
2801          * The caller MUST NOT be holding any node locks.
2802          */
2803
2804         node = search->zonecut;
2805         type = search->zonecut_rdataset->type;
2806
2807         /*
2808          * If we have to set foundname, we do it before anything else.
2809          * If we were to set foundname after we had set nodep or bound the
2810          * rdataset, then we'd have to undo that work if dns_name_copy()
2811          * failed.  By setting foundname first, there's nothing to undo if
2812          * we have trouble.
2813          */
2814         if (foundname != NULL && search->copy_name) {
2815                 zcname = dns_fixedname_name(&search->zonecut_name);
2816                 result = dns_name_copy(zcname, foundname, NULL);
2817                 if (result != ISC_R_SUCCESS)
2818                         return (result);
2819         }
2820         if (nodep != NULL) {
2821                 /*
2822                  * Note that we don't have to increment the node's reference
2823                  * count here because we're going to use the reference we
2824                  * already have in the search block.
2825                  */
2826                 *nodep = node;
2827                 search->need_cleanup = ISC_FALSE;
2828         }
2829         if (rdataset != NULL) {
2830                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2831                           isc_rwlocktype_read);
2832                 bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
2833                               search->now, rdataset);
2834                 if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
2835                         bind_rdataset(search->rbtdb, node,
2836                                       search->zonecut_sigrdataset,
2837                                       search->now, sigrdataset);
2838                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
2839                             isc_rwlocktype_read);
2840         }
2841
2842         if (type == dns_rdatatype_dname)
2843                 return (DNS_R_DNAME);
2844         return (DNS_R_DELEGATION);
2845 }
2846
2847 static inline isc_boolean_t
2848 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
2849            dns_rbtnode_t *node)
2850 {
2851         unsigned char *raw;     /* RDATASLAB */
2852         unsigned int count, size;
2853         dns_name_t ns_name;
2854         isc_boolean_t valid = ISC_FALSE;
2855         dns_offsets_t offsets;
2856         isc_region_t region;
2857         rdatasetheader_t *header;
2858
2859         /*
2860          * No additional locking is required.
2861          */
2862
2863         /*
2864          * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
2865          * if it occurs at a zone cut, but is not valid below it.
2866          */
2867         if (type == dns_rdatatype_ns) {
2868                 if (node != search->zonecut) {
2869                         return (ISC_FALSE);
2870                 }
2871         } else if (type != dns_rdatatype_a &&
2872                    type != dns_rdatatype_aaaa &&
2873                    type != dns_rdatatype_a6) {
2874                 return (ISC_FALSE);
2875         }
2876
2877         header = search->zonecut_rdataset;
2878         raw = (unsigned char *)header + sizeof(*header);
2879         count = raw[0] * 256 + raw[1];
2880 #if DNS_RDATASET_FIXED
2881         raw += 2 + (4 * count);
2882 #else
2883         raw += 2;
2884 #endif
2885
2886         while (count > 0) {
2887                 count--;
2888                 size = raw[0] * 256 + raw[1];
2889 #if DNS_RDATASET_FIXED
2890                 raw += 4;
2891 #else
2892                 raw += 2;
2893 #endif
2894                 region.base = raw;
2895                 region.length = size;
2896                 raw += size;
2897                 /*
2898                  * XXX Until we have rdata structures, we have no choice but
2899                  * to directly access the rdata format.
2900                  */
2901                 dns_name_init(&ns_name, offsets);
2902                 dns_name_fromregion(&ns_name, &region);
2903                 if (dns_name_compare(&ns_name, name) == 0) {
2904                         valid = ISC_TRUE;
2905                         break;
2906                 }
2907         }
2908
2909         return (valid);
2910 }
2911
2912 static inline isc_boolean_t
2913 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
2914             dns_name_t *name)
2915 {
2916         dns_fixedname_t fnext;
2917         dns_fixedname_t forigin;
2918         dns_name_t *next;
2919         dns_name_t *origin;
2920         dns_name_t prefix;
2921         dns_rbtdb_t *rbtdb;
2922         dns_rbtnode_t *node;
2923         isc_result_t result;
2924         isc_boolean_t answer = ISC_FALSE;
2925         rdatasetheader_t *header;
2926
2927         rbtdb = search->rbtdb;
2928
2929         dns_name_init(&prefix, NULL);
2930         dns_fixedname_init(&fnext);
2931         next = dns_fixedname_name(&fnext);
2932         dns_fixedname_init(&forigin);
2933         origin = dns_fixedname_name(&forigin);
2934
2935         result = dns_rbtnodechain_next(chain, NULL, NULL);
2936         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
2937                 node = NULL;
2938                 result = dns_rbtnodechain_current(chain, &prefix,
2939                                                   origin, &node);
2940                 if (result != ISC_R_SUCCESS)
2941                         break;
2942                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
2943                           isc_rwlocktype_read);
2944                 for (header = node->data;
2945                      header != NULL;
2946                      header = header->next) {
2947                         if (header->serial <= search->serial &&
2948                             !IGNORE(header) && EXISTS(header))
2949                                 break;
2950                 }
2951                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
2952                             isc_rwlocktype_read);
2953                 if (header != NULL)
2954                         break;
2955                 result = dns_rbtnodechain_next(chain, NULL, NULL);
2956         }
2957         if (result == ISC_R_SUCCESS)
2958                 result = dns_name_concatenate(&prefix, origin, next, NULL);
2959         if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name))
2960                 answer = ISC_TRUE;
2961         return (answer);
2962 }
2963
2964 static inline isc_boolean_t
2965 activeemtpynode(rbtdb_search_t *search, dns_name_t *qname, dns_name_t *wname) {
2966         dns_fixedname_t fnext;
2967         dns_fixedname_t forigin;
2968         dns_fixedname_t fprev;
2969         dns_name_t *next;
2970         dns_name_t *origin;
2971         dns_name_t *prev;
2972         dns_name_t name;
2973         dns_name_t rname;
2974         dns_name_t tname;
2975         dns_rbtdb_t *rbtdb;
2976         dns_rbtnode_t *node;
2977         dns_rbtnodechain_t chain;
2978         isc_boolean_t check_next = ISC_TRUE;
2979         isc_boolean_t check_prev = ISC_TRUE;
2980         isc_boolean_t answer = ISC_FALSE;
2981         isc_result_t result;
2982         rdatasetheader_t *header;
2983         unsigned int n;
2984
2985         rbtdb = search->rbtdb;
2986
2987         dns_name_init(&name, NULL);
2988         dns_name_init(&tname, NULL);
2989         dns_name_init(&rname, NULL);
2990         dns_fixedname_init(&fnext);
2991         next = dns_fixedname_name(&fnext);
2992         dns_fixedname_init(&fprev);
2993         prev = dns_fixedname_name(&fprev);
2994         dns_fixedname_init(&forigin);
2995         origin = dns_fixedname_name(&forigin);
2996
2997         /*
2998          * Find if qname is at or below a empty node.
2999          * Use our own copy of the chain.
3000          */
3001
3002         chain = search->chain;
3003         do {
3004                 node = NULL;
3005                 result = dns_rbtnodechain_current(&chain, &name,
3006                                                   origin, &node);
3007                 if (result != ISC_R_SUCCESS)
3008                         break;
3009                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3010                           isc_rwlocktype_read);
3011                 for (header = node->data;
3012                      header != NULL;
3013                      header = header->next) {
3014                         if (header->serial <= search->serial &&
3015                             !IGNORE(header) && EXISTS(header))
3016                                 break;
3017                 }
3018                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3019                             isc_rwlocktype_read);
3020                 if (header != NULL)
3021                         break;
3022                 result = dns_rbtnodechain_prev(&chain, NULL, NULL);
3023         } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
3024         if (result == ISC_R_SUCCESS)
3025                 result = dns_name_concatenate(&name, origin, prev, NULL);
3026         if (result != ISC_R_SUCCESS)
3027                 check_prev = ISC_FALSE;
3028
3029         result = dns_rbtnodechain_next(&chain, NULL, NULL);
3030         while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3031                 node = NULL;
3032                 result = dns_rbtnodechain_current(&chain, &name,
3033                                                   origin, &node);
3034                 if (result != ISC_R_SUCCESS)
3035                         break;
3036                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3037                           isc_rwlocktype_read);
3038                 for (header = node->data;
3039                      header != NULL;
3040                      header = header->next) {
3041                         if (header->serial <= search->serial &&
3042                             !IGNORE(header) && EXISTS(header))
3043                                 break;
3044                 }
3045                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3046                             isc_rwlocktype_read);
3047                 if (header != NULL)
3048                         break;
3049                 result = dns_rbtnodechain_next(&chain, NULL, NULL);
3050         }
3051         if (result == ISC_R_SUCCESS)
3052                 result = dns_name_concatenate(&name, origin, next, NULL);
3053         if (result != ISC_R_SUCCESS)
3054                 check_next = ISC_FALSE;
3055
3056         dns_name_clone(qname, &rname);
3057
3058         /*
3059          * Remove the wildcard label to find the terminal name.
3060          */
3061         n = dns_name_countlabels(wname);
3062         dns_name_getlabelsequence(wname, 1, n - 1, &tname);
3063
3064         do {
3065                 if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
3066                     (check_next && dns_name_issubdomain(next, &rname))) {
3067                         answer = ISC_TRUE;
3068                         break;
3069                 }
3070                 /*
3071                  * Remove the left hand label.
3072                  */
3073                 n = dns_name_countlabels(&rname);
3074                 dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
3075         } while (!dns_name_equal(&rname, &tname));
3076         return (answer);
3077 }
3078
3079 static inline isc_result_t
3080 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
3081               dns_name_t *qname)
3082 {
3083         unsigned int i, j;
3084         dns_rbtnode_t *node, *level_node, *wnode;
3085         rdatasetheader_t *header;
3086         isc_result_t result = ISC_R_NOTFOUND;
3087         dns_name_t name;
3088         dns_name_t *wname;
3089         dns_fixedname_t fwname;
3090         dns_rbtdb_t *rbtdb;
3091         isc_boolean_t done, wild, active;
3092         dns_rbtnodechain_t wchain;
3093
3094         /*
3095          * Caller must be holding the tree lock and MUST NOT be holding
3096          * any node locks.
3097          */
3098
3099         /*
3100          * Examine each ancestor level.  If the level's wild bit
3101          * is set, then construct the corresponding wildcard name and
3102          * search for it.  If the wildcard node exists, and is active in
3103          * this version, we're done.  If not, then we next check to see
3104          * if the ancestor is active in this version.  If so, then there
3105          * can be no possible wildcard match and again we're done.  If not,
3106          * continue the search.
3107          */
3108
3109         rbtdb = search->rbtdb;
3110         i = search->chain.level_matches;
3111         done = ISC_FALSE;
3112         node = *nodep;
3113         do {
3114                 NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
3115                           isc_rwlocktype_read);
3116
3117                 /*
3118                  * First we try to figure out if this node is active in
3119                  * the search's version.  We do this now, even though we
3120                  * may not need the information, because it simplifies the
3121                  * locking and code flow.
3122                  */
3123                 for (header = node->data;
3124                      header != NULL;
3125                      header = header->next) {
3126                         if (header->serial <= search->serial &&
3127                             !IGNORE(header) && EXISTS(header))
3128                                 break;
3129                 }
3130                 if (header != NULL)
3131                         active = ISC_TRUE;
3132                 else
3133                         active = ISC_FALSE;
3134
3135                 if (node->wild)
3136                         wild = ISC_TRUE;
3137                 else
3138                         wild = ISC_FALSE;
3139
3140                 NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
3141                             isc_rwlocktype_read);
3142
3143                 if (wild) {
3144                         /*
3145                          * Construct the wildcard name for this level.
3146                          */
3147                         dns_name_init(&name, NULL);
3148                         dns_rbt_namefromnode(node, &name);
3149                         dns_fixedname_init(&fwname);
3150                         wname = dns_fixedname_name(&fwname);
3151                         result = dns_name_concatenate(dns_wildcardname, &name,
3152                                                       wname, NULL);
3153                         j = i;
3154                         while (result == ISC_R_SUCCESS && j != 0) {
3155                                 j--;
3156                                 level_node = search->chain.levels[j];
3157                                 dns_name_init(&name, NULL);
3158                                 dns_rbt_namefromnode(level_node, &name);
3159                                 result = dns_name_concatenate(wname,
3160                                                               &name,
3161                                                               wname,
3162                                                               NULL);
3163                         }
3164                         if (result != ISC_R_SUCCESS)
3165                                 break;
3166
3167                         wnode = NULL;
3168                         dns_rbtnodechain_init(&wchain, NULL);
3169                         result = dns_rbt_findnode(rbtdb->tree, wname,
3170                                                   NULL, &wnode, &wchain,
3171                                                   DNS_RBTFIND_EMPTYDATA,
3172                                                   NULL, NULL);
3173                         if (result == ISC_R_SUCCESS) {
3174                                 nodelock_t *lock;
3175
3176                                 /*
3177                                  * We have found the wildcard node.  If it
3178                                  * is active in the search's version, we're
3179                                  * done.
3180                                  */
3181                                 lock = &rbtdb->node_locks[wnode->locknum].lock;
3182                                 NODE_LOCK(lock, isc_rwlocktype_read);
3183                                 for (header = wnode->data;
3184                                      header != NULL;
3185                                      header = header->next) {
3186                                         if (header->serial <= search->serial &&
3187                                             !IGNORE(header) && EXISTS(header))
3188                                                 break;
3189                                 }
3190                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3191                                 if (header != NULL ||
3192                                     activeempty(search, &wchain, wname)) {
3193                                         if (activeemtpynode(search, qname,
3194                                                             wname)) {
3195                                                 return (ISC_R_NOTFOUND);
3196                                         }
3197                                         /*
3198                                          * The wildcard node is active!
3199                                          *
3200                                          * Note: result is still ISC_R_SUCCESS
3201                                          * so we don't have to set it.
3202                                          */
3203                                         *nodep = wnode;
3204                                         break;
3205                                 }
3206                         } else if (result != ISC_R_NOTFOUND &&
3207                                    result != DNS_R_PARTIALMATCH) {
3208                                 /*
3209                                  * An error has occurred.  Bail out.
3210                                  */
3211                                 break;
3212                         }
3213                 }
3214
3215                 if (active) {
3216                         /*
3217                          * The level node is active.  Any wildcarding
3218                          * present at higher levels has no
3219                          * effect and we're done.
3220                          */
3221                         result = ISC_R_NOTFOUND;
3222                         break;
3223                 }
3224
3225                 if (i > 0) {
3226                         i--;
3227                         node = search->chain.levels[i];
3228                 } else
3229                         done = ISC_TRUE;
3230         } while (!done);
3231
3232         return (result);
3233 }
3234
3235 static isc_boolean_t
3236 matchparams(rdatasetheader_t *header, rbtdb_search_t *search)
3237 {
3238         dns_rdata_t rdata = DNS_RDATA_INIT;
3239         dns_rdata_nsec3_t nsec3;
3240         unsigned char *raw;                     /* RDATASLAB */
3241         unsigned int rdlen, count;
3242         isc_region_t region;
3243         isc_result_t result;
3244
3245         REQUIRE(header->type == dns_rdatatype_nsec3);
3246
3247         raw = (unsigned char *)header + sizeof(*header);
3248         count = raw[0] * 256 + raw[1]; /* count */
3249 #if DNS_RDATASET_FIXED
3250         raw += count * 4 + 2;
3251 #else
3252         raw += 2;
3253 #endif
3254         while (count-- > 0) {
3255                 rdlen = raw[0] * 256 + raw[1];
3256 #if DNS_RDATASET_FIXED
3257                 raw += 4;
3258 #else
3259                 raw += 2;
3260 #endif
3261                 region.base = raw;
3262                 region.length = rdlen;
3263                 dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
3264                                      dns_rdatatype_nsec3, &region);
3265                 raw += rdlen;
3266                 result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
3267                 INSIST(result == ISC_R_SUCCESS);
3268                 if (nsec3.hash == search->rbtversion->hash &&
3269                     nsec3.iterations == search->rbtversion->iterations &&
3270                     nsec3.salt_length == search->rbtversion->salt_length &&
3271                     memcmp(nsec3.salt, search->rbtversion->salt,
3272                            nsec3.salt_length) == 0)
3273                         return (ISC_TRUE);
3274                 dns_rdata_reset(&rdata);
3275         }
3276         return (ISC_FALSE);
3277 }
3278
3279 static inline isc_result_t
3280 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
3281                     dns_name_t *name, dns_name_t *origin,
3282                     dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
3283                     isc_boolean_t *firstp)
3284 {
3285         dns_fixedname_t ftarget;
3286         dns_name_t *target;
3287         dns_rbtnode_t *nsecnode;
3288         isc_result_t result;
3289
3290         if (type == dns_rdatatype_nsec3) {
3291                 result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
3292                 if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN)
3293                         return (result);
3294                 result = dns_rbtnodechain_current(&search->chain, name, origin,
3295                                                   nodep);
3296                 if (result != ISC_R_SUCCESS)
3297                         return (result);
3298                 return (ISC_R_SUCCESS);
3299         }
3300
3301         dns_fixedname_init(&ftarget);
3302         target = dns_fixedname_name(&ftarget);
3303
3304         for (;;) {
3305                 if (*firstp) {
3306                         /*
3307                          * Construct the name of the second node to check.
3308                          * It is the first node sought in the NSEC tree.
3309                          */
3310                         *firstp = ISC_FALSE;
3311                         dns_rbtnodechain_init(nsecchain, NULL);
3312                         result = dns_name_concatenate(name, origin,
3313                                                       target, NULL);
3314                         if (result != ISC_R_SUCCESS)
3315                                 return (result);
3316                         nsecnode = NULL;
3317                         result = dns_rbt_findnode(search->rbtdb->nsec,
3318                                                   target, NULL,
3319                                                   &nsecnode, nsecchain,
3320                                                   DNS_RBTFIND_NOOPTIONS,
3321                                                   NULL, NULL);
3322                         if (result == ISC_R_SUCCESS) {
3323                                 /*
3324                                  * Since this was the first loop, finding the
3325                                  * name in the NSEC tree implies that the first
3326                                  * node checked in the main tree had an
3327                                  * unacceptable NSEC record.
3328                                  * Try the previous node in the NSEC tree.
3329                                  */
3330                                 result = dns_rbtnodechain_prev(nsecchain,
3331                                                         name, origin);
3332                                 if (result == DNS_R_NEWORIGIN)
3333                                         result = ISC_R_SUCCESS;
3334                         } else if (result == ISC_R_NOTFOUND
3335                                    || result == DNS_R_PARTIALMATCH) {
3336                                 result = dns_rbtnodechain_current(nsecchain,
3337                                                         name, origin, NULL);
3338                                 if (result == ISC_R_NOTFOUND)
3339                                         result = ISC_R_NOMORE;
3340                         }
3341                 } else {
3342                         /*
3343                          * This is a second or later trip through the auxiliary
3344                          * tree for the name of a third or earlier NSEC node in
3345                          * the main tree.  Previous trips through the NSEC tree
3346                          * must have found nodes in the main tree with NSEC
3347                          * records.  Perhaps they lacked signature records.
3348                          */
3349                         result = dns_rbtnodechain_prev(nsecchain, name, origin);
3350                         if (result == DNS_R_NEWORIGIN)
3351                                 result = ISC_R_SUCCESS;
3352                         if (result != ISC_R_SUCCESS)
3353                                 return (result);
3354                 }
3355                 if (result != ISC_R_SUCCESS)
3356                         return (result);
3357
3358                 /*
3359                  * Construct the name to seek in the main tree.
3360                  */
3361                 result = dns_name_concatenate(name, origin, target, NULL);
3362                 if (result != ISC_R_SUCCESS)
3363                         return (result);
3364
3365                 *nodep = NULL;
3366                 result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
3367                                           nodep, &search->chain,
3368                                           DNS_RBTFIND_NOOPTIONS, NULL, NULL);
3369                 if (result == ISC_R_SUCCESS)
3370                         return (result);
3371
3372                 /*
3373                  * There should always be a node in the main tree with the
3374                  * same name as the node in the auxiliary NSEC tree, except for
3375                  * nodes in the auxiliary tree that are awaiting deletion.
3376                  */
3377                 if (result == DNS_R_PARTIALMATCH)
3378                         result = ISC_R_NOTFOUND;
3379
3380                 if (result != ISC_R_NOTFOUND) {
3381                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
3382                                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
3383                                       "previous_closest_nsec(): %s",
3384                                       isc_result_totext(result));
3385                         return (DNS_R_BADDB);
3386                 }
3387         }
3388 }
3389
3390 static inline isc_result_t
3391 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
3392                   dns_name_t *foundname, dns_rdataset_t *rdataset,
3393                   dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
3394                   dns_db_secure_t secure)
3395 {
3396         dns_rbtnode_t *node, *prevnode;
3397         rdatasetheader_t *header, *header_next, *found, *foundsig;
3398         dns_rbtnodechain_t nsecchain;
3399         isc_boolean_t empty_node;
3400         isc_result_t result;
3401         dns_fixedname_t fname, forigin;
3402         dns_name_t *name, *origin;
3403         dns_rdatatype_t type;
3404         rbtdb_rdatatype_t sigtype;
3405         isc_boolean_t wraps;
3406         isc_boolean_t first = ISC_TRUE;
3407         isc_boolean_t need_sig = ISC_TF(secure == dns_db_secure);
3408
3409         if (tree == search->rbtdb->nsec3) {
3410                 type = dns_rdatatype_nsec3;
3411                 sigtype = RBTDB_RDATATYPE_SIGNSEC3;
3412                 wraps = ISC_TRUE;
3413         } else {
3414                 type = dns_rdatatype_nsec;
3415                 sigtype = RBTDB_RDATATYPE_SIGNSEC;
3416                 wraps = ISC_FALSE;
3417         }
3418
3419         /*
3420          * Use the auxiliary tree only starting with the second node in the
3421          * hope that the original node will be right much of the time.
3422          */
3423                 dns_fixedname_init(&fname);
3424                 name = dns_fixedname_name(&fname);
3425                 dns_fixedname_init(&forigin);
3426                 origin = dns_fixedname_name(&forigin);
3427  again:
3428         node = NULL;
3429         result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
3430                 if (result != ISC_R_SUCCESS)
3431                         return (result);
3432         do {
3433                 NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3434                           isc_rwlocktype_read);
3435                 found = NULL;
3436                 foundsig = NULL;
3437                 empty_node = ISC_TRUE;
3438                 for (header = node->data;
3439                      header != NULL;
3440                      header = header_next) {
3441                         header_next = header->next;
3442                         /*
3443                          * Look for an active, extant NSEC or RRSIG NSEC.
3444                          */
3445                         do {
3446                                 if (header->serial <= search->serial &&
3447                                     !IGNORE(header)) {
3448                                         /*
3449                                          * Is this a "this rdataset doesn't
3450                                          * exist" record?
3451                                          */
3452                                         if (NONEXISTENT(header))
3453                                                 header = NULL;
3454                                         break;
3455                                 } else
3456                                         header = header->down;
3457                         } while (header != NULL);
3458                         if (header != NULL) {
3459                                 /*
3460                                  * We now know that there is at least one
3461                                  * active rdataset at this node.
3462                                  */
3463                                 empty_node = ISC_FALSE;
3464                                 if (header->type == type) {
3465                                         found = header;
3466                                         if (foundsig != NULL)
3467                                                 break;
3468                                 } else if (header->type == sigtype) {
3469                                         foundsig = header;
3470                                         if (found != NULL)
3471                                                 break;
3472                                 }
3473                         }
3474                 }
3475                 if (!empty_node) {
3476                         if (found != NULL && search->rbtversion->havensec3 &&
3477                             found->type == dns_rdatatype_nsec3 &&
3478                             !matchparams(found, search)) {
3479                                 empty_node = ISC_TRUE;
3480                                 found = NULL;
3481                                 foundsig = NULL;
3482                                 result = dns_rbtnodechain_prev(&search->chain,
3483                                                                NULL, NULL);
3484                         } else if (found != NULL &&
3485                                    (foundsig != NULL || !need_sig)) {
3486                                 /*
3487                                  * We've found the right NSEC/NSEC3 record.
3488                                  *
3489                                  * Note: for this to really be the right
3490                                  * NSEC record, it's essential that the NSEC
3491                                  * records of any nodes obscured by a zone
3492                                  * cut have been removed; we assume this is
3493                                  * the case.
3494                                  */
3495                                 result = dns_name_concatenate(name, origin,
3496                                                               foundname, NULL);
3497                                 if (result == ISC_R_SUCCESS) {
3498                                         if (nodep != NULL) {
3499                                                 new_reference(search->rbtdb,
3500                                                               node);
3501                                                 *nodep = node;
3502                                         }
3503                                         bind_rdataset(search->rbtdb, node,
3504                                                       found, search->now,
3505                                                       rdataset);
3506                                         if (foundsig != NULL)
3507                                                 bind_rdataset(search->rbtdb,
3508                                                               node,
3509                                                               foundsig,
3510                                                               search->now,
3511                                                               sigrdataset);
3512                                 }
3513                         } else if (found == NULL && foundsig == NULL) {
3514                                 /*
3515                                  * This node is active, but has no NSEC or
3516                                  * RRSIG NSEC.  That means it's glue or
3517                                  * other obscured zone data that isn't
3518                                  * relevant for our search.  Treat the
3519                                  * node as if it were empty and keep looking.
3520                                  */
3521                                 empty_node = ISC_TRUE;
3522                                 result = previous_closest_nsec(type, search,
3523                                                         name, origin, &prevnode,
3524                                                         &nsecchain, &first);
3525                         } else {
3526                                 /*
3527                                  * We found an active node, but either the
3528                                  * NSEC or the RRSIG NSEC is missing.  This
3529                                  * shouldn't happen.
3530                                  */
3531                                 result = DNS_R_BADDB;
3532                         }
3533                 } else {
3534                         /*
3535                          * This node isn't active.  We've got to keep
3536                          * looking.
3537                          */
3538                         result = previous_closest_nsec(type, search,
3539                                                        name, origin, &prevnode,
3540                                                        &nsecchain, &first);
3541                 }
3542                 NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
3543                             isc_rwlocktype_read);
3544                 node = prevnode;
3545         } while (empty_node && result == ISC_R_SUCCESS);
3546
3547         if (!first)
3548                 dns_rbtnodechain_invalidate(&nsecchain);
3549
3550         if (result == ISC_R_NOMORE && wraps) {
3551                 result = dns_rbtnodechain_last(&search->chain, tree,
3552                                                NULL, NULL);
3553                 if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
3554                         wraps = ISC_FALSE;
3555                         goto again;
3556                 }
3557         }
3558
3559         /*
3560          * If the result is ISC_R_NOMORE, then we got to the beginning of
3561          * the database and didn't find a NSEC record.  This shouldn't
3562          * happen.
3563          */
3564         if (result == ISC_R_NOMORE)
3565                 result = DNS_R_BADDB;
3566
3567         return (result);
3568 }
3569
3570 static isc_result_t
3571 zone_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
3572           dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
3573           dns_dbnode_t **nodep, dns_name_t *foundname,
3574           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
3575 {
3576         dns_rbtnode_t *node = NULL;
3577         isc_result_t result;
3578         rbtdb_search_t search;
3579         isc_boolean_t cname_ok = ISC_TRUE;
3580         isc_boolean_t close_version = ISC_FALSE;
3581         isc_boolean_t maybe_zonecut = ISC_FALSE;
3582         isc_boolean_t at_zonecut = ISC_FALSE;
3583         isc_boolean_t wild;
3584         isc_boolean_t empty_node;
3585         rdatasetheader_t *header, *header_next, *found, *nsecheader;
3586         rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
3587         rbtdb_rdatatype_t sigtype;
3588         isc_boolean_t active;
3589         dns_rbtnodechain_t chain;
3590         nodelock_t *lock;
3591         dns_rbt_t *tree;
3592
3593         search.rbtdb = (dns_rbtdb_t *)db;
3594
3595         REQUIRE(VALID_RBTDB(search.rbtdb));
3596
3597         /*
3598          * We don't care about 'now'.
3599          */
3600         UNUSED(now);
3601
3602         /*
3603          * If the caller didn't supply a version, attach to the current
3604          * version.
3605          */
3606         if (version == NULL) {
3607                 currentversion(db, &version);
3608                 close_version = ISC_TRUE;
3609         }
3610
3611         search.rbtversion = version;
3612         search.serial = search.rbtversion->serial;
3613         search.options = options;
3614         search.copy_name = ISC_FALSE;
3615         search.need_cleanup = ISC_FALSE;
3616         search.wild = ISC_FALSE;
3617         search.zonecut = NULL;
3618         dns_fixedname_init(&search.zonecut_name);
3619         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
3620         search.now = 0;
3621
3622         /*
3623          * 'wild' will be true iff. we've matched a wildcard.
3624          */
3625         wild = ISC_FALSE;
3626
3627         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
3628
3629         /*
3630          * Search down from the root of the tree.  If, while going down, we
3631          * encounter a callback node, zone_zonecut_callback() will search the
3632          * rdatasets at the zone cut for active DNAME or NS rdatasets.
3633          */
3634         tree =  (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3 :
3635                                                          search.rbtdb->tree;
3636         result = dns_rbt_findnode(tree, name, foundname, &node,
3637                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
3638                                   zone_zonecut_callback, &search);
3639
3640         if (result == DNS_R_PARTIALMATCH) {
3641         partial_match:
3642                 if (search.zonecut != NULL) {
3643                     result = setup_delegation(&search, nodep, foundname,
3644                                               rdataset, sigrdataset);
3645                     goto tree_exit;
3646                 }
3647
3648                 if (search.wild) {
3649                         /*
3650                          * At least one of the levels in the search chain
3651                          * potentially has a wildcard.  For each such level,
3652                          * we must see if there's a matching wildcard active
3653                          * in the current version.
3654                          */
3655                         result = find_wildcard(&search, &node, name);
3656                         if (result == ISC_R_SUCCESS) {
3657                                 result = dns_name_copy(name, foundname, NULL);
3658                                 if (result != ISC_R_SUCCESS)
3659                                         goto tree_exit;
3660                                 wild = ISC_TRUE;
3661                                 goto found;
3662                         }
3663                         else if (result != ISC_R_NOTFOUND)
3664                                 goto tree_exit;
3665                 }
3666
3667                 chain = search.chain;
3668                 active = activeempty(&search, &chain, name);
3669
3670                 /*
3671                  * If we're here, then the name does not exist, is not
3672                  * beneath a zonecut, and there's no matching wildcard.
3673                  */
3674                 if ((search.rbtversion->secure == dns_db_secure &&
3675                      !search.rbtversion->havensec3) ||
3676                     (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
3677                     (search.options & DNS_DBFIND_FORCENSEC3) != 0)
3678                 {
3679                         result = find_closest_nsec(&search, nodep, foundname,
3680                                                    rdataset, sigrdataset, tree,
3681                                                    search.rbtversion->secure);
3682                         if (result == ISC_R_SUCCESS)
3683                                 result = active ? DNS_R_EMPTYNAME :
3684                                                   DNS_R_NXDOMAIN;
3685                 } else
3686                         result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
3687                 goto tree_exit;
3688         } else if (result != ISC_R_SUCCESS)
3689                 goto tree_exit;
3690
3691  found:
3692         /*
3693          * We have found a node whose name is the desired name, or we
3694          * have matched a wildcard.
3695          */
3696
3697         if (search.zonecut != NULL) {
3698                 /*
3699                  * If we're beneath a zone cut, we don't want to look for
3700                  * CNAMEs because they're not legitimate zone glue.
3701                  */
3702                 cname_ok = ISC_FALSE;
3703         } else {
3704                 /*
3705                  * The node may be a zone cut itself.  If it might be one,
3706                  * make sure we check for it later.
3707                  *
3708                  * DS records live above the zone cut in ordinary zone so
3709                  * we want to ignore any referral.
3710                  *
3711                  * Stub zones don't have anything "above" the delgation so
3712                  * we always return a referral.
3713                  */
3714                 if (node->find_callback &&
3715                     ((node != search.rbtdb->origin_node &&
3716                       !dns_rdatatype_atparent(type)) ||
3717                      IS_STUB(search.rbtdb)))
3718                         maybe_zonecut = ISC_TRUE;
3719         }
3720
3721         /*
3722          * Certain DNSSEC types are not subject to CNAME matching
3723          * (RFC4035, section 2.5 and RFC3007).
3724          *
3725          * We don't check for RRSIG, because we don't store RRSIG records
3726          * directly.
3727          */
3728         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
3729                 cname_ok = ISC_FALSE;
3730
3731         /*
3732          * We now go looking for rdata...
3733          */
3734
3735         lock = &search.rbtdb->node_locks[node->locknum].lock;
3736         NODE_LOCK(lock, isc_rwlocktype_read);
3737
3738         found = NULL;
3739         foundsig = NULL;
3740         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
3741         nsecheader = NULL;
3742         nsecsig = NULL;
3743         cnamesig = NULL;
3744         empty_node = ISC_TRUE;
3745         for (header = node->data; header != NULL; header = header_next) {
3746                 header_next = header->next;
3747                 /*
3748                  * Look for an active, extant rdataset.
3749                  */
3750                 do {
3751                         if (header->serial <= search.serial &&
3752                             !IGNORE(header)) {
3753                                 /*
3754                                  * Is this a "this rdataset doesn't
3755                                  * exist" record?
3756                                  */
3757                                 if (NONEXISTENT(header))
3758                                         header = NULL;
3759                                 break;
3760                         } else
3761                                 header = header->down;
3762                 } while (header != NULL);
3763                 if (header != NULL) {
3764                         /*
3765                          * We now know that there is at least one active
3766                          * rdataset at this node.
3767                          */
3768                         empty_node = ISC_FALSE;
3769
3770                         /*
3771                          * Do special zone cut handling, if requested.
3772                          */
3773                         if (maybe_zonecut &&
3774                             header->type == dns_rdatatype_ns) {
3775                                 /*
3776                                  * We increment the reference count on node to
3777                                  * ensure that search->zonecut_rdataset will
3778                                  * still be valid later.
3779                                  */
3780                                 new_reference(search.rbtdb, node);
3781                                 search.zonecut = node;
3782                                 search.zonecut_rdataset = header;
3783                                 search.zonecut_sigrdataset = NULL;
3784                                 search.need_cleanup = ISC_TRUE;
3785                                 maybe_zonecut = ISC_FALSE;
3786                                 at_zonecut = ISC_TRUE;
3787                                 /*
3788                                  * It is not clear if KEY should still be
3789                                  * allowed at the parent side of the zone
3790                                  * cut or not.  It is needed for RFC3007
3791                                  * validated updates.
3792                                  */
3793                                 if ((search.options & DNS_DBFIND_GLUEOK) == 0
3794                                     && type != dns_rdatatype_nsec
3795                                     && type != dns_rdatatype_key) {
3796                                         /*
3797                                          * Glue is not OK, but any answer we
3798                                          * could return would be glue.  Return
3799                                          * the delegation.
3800                                          */
3801                                         found = NULL;
3802                                         break;
3803                                 }
3804                                 if (found != NULL && foundsig != NULL)
3805                                         break;
3806                         }
3807
3808
3809                         /*
3810                          * If the NSEC3 record doesn't match the chain
3811                          * we are using behave as if it isn't here.
3812                          */
3813                         if (header->type == dns_rdatatype_nsec3 &&
3814                            !matchparams(header, &search)) {
3815                                 NODE_UNLOCK(lock, isc_rwlocktype_read);
3816                                 goto partial_match;
3817                         }
3818                         /*
3819                          * If we found a type we were looking for,
3820                          * remember it.
3821                          */
3822                         if (header->type == type ||
3823                             type == dns_rdatatype_any ||
3824                             (header->type == dns_rdatatype_cname &&
3825                              cname_ok)) {
3826                                 /*
3827                                  * We've found the answer!
3828                                  */
3829                                 found = header;
3830                                 if (header->type == dns_rdatatype_cname &&
3831                                     cname_ok) {
3832                                         /*
3833                                          * We may be finding a CNAME instead
3834                                          * of the desired type.
3835                                          *
3836                                          * If we've already got the CNAME RRSIG,
3837                                          * use it, otherwise change sigtype
3838                                          * so that we find it.
3839                                          */
3840                                         if (cnamesig != NULL)
3841                                                 foundsig = cnamesig;
3842                                         else
3843                                                 sigtype =
3844                                                     RBTDB_RDATATYPE_SIGCNAME;
3845                                 }
3846                                 /*
3847                                  * If we've got all we need, end the search.
3848                                  */
3849                                 if (!maybe_zonecut && foundsig != NULL)
3850                                         break;
3851                         } else if (header->type == sigtype) {
3852                                 /*
3853                                  * We've found the RRSIG rdataset for our
3854                                  * target type.  Remember it.
3855                                  */
3856                                 foundsig = header;
3857                                 /*
3858                                  * If we've got all we need, end the search.
3859                                  */
3860                                 if (!maybe_zonecut && found != NULL)
3861                                         break;
3862                         } else if (header->type == dns_rdatatype_nsec &&
3863                                    !search.rbtversion->havensec3) {
3864                                 /*
3865                                  * Remember a NSEC rdataset even if we're
3866                                  * not specifically looking for it, because
3867                                  * we might need it later.
3868                                  */
3869                                 nsecheader = header;
3870                         } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
3871                                    !search.rbtversion->havensec3) {
3872                                 /*
3873                                  * If we need the NSEC rdataset, we'll also
3874                                  * need its signature.
3875                                  */
3876                                 nsecsig = header;
3877                         } else if (cname_ok &&
3878                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
3879                                 /*
3880                                  * If we get a CNAME match, we'll also need
3881                                  * its signature.
3882                                  */
3883                                 cnamesig = header;
3884                         }
3885                 }
3886         }
3887
3888         if (empty_node) {
3889                 /*
3890                  * We have an exact match for the name, but there are no
3891                  * active rdatasets in the desired version.  That means that
3892                  * this node doesn't exist in the desired version, and that
3893                  * we really have a partial match.
3894                  */
3895                 if (!wild) {
3896                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3897                         goto partial_match;
3898                 }
3899         }
3900
3901         /*
3902          * If we didn't find what we were looking for...
3903          */
3904         if (found == NULL) {
3905                 if (search.zonecut != NULL) {
3906                         /*
3907                          * We were trying to find glue at a node beneath a
3908                          * zone cut, but didn't.
3909                          *
3910                          * Return the delegation.
3911                          */
3912                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3913                         result = setup_delegation(&search, nodep, foundname,
3914                                                   rdataset, sigrdataset);
3915                         goto tree_exit;
3916                 }
3917                 /*
3918                  * The desired type doesn't exist.
3919                  */
3920                 result = DNS_R_NXRRSET;
3921                 if (search.rbtversion->secure == dns_db_secure &&
3922                     !search.rbtversion->havensec3 &&
3923                     (nsecheader == NULL || nsecsig == NULL)) {
3924                         /*
3925                          * The zone is secure but there's no NSEC,
3926                          * or the NSEC has no signature!
3927                          */
3928                         if (!wild) {
3929                                 result = DNS_R_BADDB;
3930                                 goto node_exit;
3931                         }
3932
3933                         NODE_UNLOCK(lock, isc_rwlocktype_read);
3934                         result = find_closest_nsec(&search, nodep, foundname,
3935                                                    rdataset, sigrdataset,
3936                                                    search.rbtdb->tree,
3937                                                    search.rbtversion->secure);
3938                         if (result == ISC_R_SUCCESS)
3939                                 result = DNS_R_EMPTYWILD;
3940                         goto tree_exit;
3941                 }
3942                 if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
3943                     nsecheader == NULL)
3944                 {
3945                         /*
3946                          * There's no NSEC record, and we were told
3947                          * to find one.
3948                          */
3949                         result = DNS_R_BADDB;
3950                         goto node_exit;
3951                 }
3952                 if (nodep != NULL) {
3953                         new_reference(search.rbtdb, node);
3954                         *nodep = node;
3955                 }
3956                 if ((search.rbtversion->secure == dns_db_secure &&
3957                      !search.rbtversion->havensec3) ||
3958                     (search.options & DNS_DBFIND_FORCENSEC) != 0)
3959                 {
3960                         bind_rdataset(search.rbtdb, node, nsecheader,
3961                                       0, rdataset);
3962                         if (nsecsig != NULL)
3963                                 bind_rdataset(search.rbtdb, node,
3964                                               nsecsig, 0, sigrdataset);
3965                 }
3966                 if (wild)
3967                         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
3968                 goto node_exit;
3969         }
3970
3971         /*
3972          * We found what we were looking for, or we found a CNAME.
3973          */
3974
3975         if (type != found->type &&
3976             type != dns_rdatatype_any &&
3977             found->type == dns_rdatatype_cname) {
3978                 /*
3979                  * We weren't doing an ANY query and we found a CNAME instead
3980                  * of the type we were looking for, so we need to indicate
3981                  * that result to the caller.
3982                  */
3983                 result = DNS_R_CNAME;
3984         } else if (search.zonecut != NULL) {
3985                 /*
3986                  * If we're beneath a zone cut, we must indicate that the
3987                  * result is glue, unless we're actually at the zone cut
3988                  * and the type is NSEC or KEY.
3989                  */
3990                 if (search.zonecut == node) {
3991                         /*
3992                          * It is not clear if KEY should still be
3993                          * allowed at the parent side of the zone
3994                          * cut or not.  It is needed for RFC3007
3995                          * validated updates.
3996                          */
3997                         if (type == dns_rdatatype_nsec ||
3998                             type == dns_rdatatype_nsec3 ||
3999                             type == dns_rdatatype_key)
4000                                 result = ISC_R_SUCCESS;
4001                         else if (type == dns_rdatatype_any)
4002                                 result = DNS_R_ZONECUT;
4003                         else
4004                                 result = DNS_R_GLUE;
4005                 } else
4006                         result = DNS_R_GLUE;
4007                 /*
4008                  * We might have found data that isn't glue, but was occluded
4009                  * by a dynamic update.  If the caller cares about this, they
4010                  * will have told us to validate glue.
4011                  *
4012                  * XXX We should cache the glue validity state!
4013                  */
4014                 if (result == DNS_R_GLUE &&
4015                     (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
4016                     !valid_glue(&search, foundname, type, node)) {
4017                         NODE_UNLOCK(lock, isc_rwlocktype_read);
4018                         result = setup_delegation(&search, nodep, foundname,
4019                                                   rdataset, sigrdataset);
4020                     goto tree_exit;
4021                 }
4022         } else {
4023                 /*
4024                  * An ordinary successful query!
4025                  */
4026                 result = ISC_R_SUCCESS;
4027         }
4028
4029         if (nodep != NULL) {
4030                 if (!at_zonecut)
4031                         new_reference(search.rbtdb, node);
4032                 else
4033                         search.need_cleanup = ISC_FALSE;
4034                 *nodep = node;
4035         }
4036
4037         if (type != dns_rdatatype_any) {
4038                 bind_rdataset(search.rbtdb, node, found, 0, rdataset);
4039                 if (foundsig != NULL)
4040                         bind_rdataset(search.rbtdb, node, foundsig, 0,
4041                                       sigrdataset);
4042         }
4043
4044         if (wild)
4045                 foundname->attributes |= DNS_NAMEATTR_WILDCARD;
4046
4047  node_exit:
4048         NODE_UNLOCK(lock, isc_rwlocktype_read);
4049
4050  tree_exit:
4051         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4052
4053         /*
4054          * If we found a zonecut but aren't going to use it, we have to
4055          * let go of it.
4056          */
4057         if (search.need_cleanup) {
4058                 node = search.zonecut;
4059                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4060
4061                 NODE_LOCK(lock, isc_rwlocktype_read);
4062                 decrement_reference(search.rbtdb, node, 0,
4063                                     isc_rwlocktype_read, isc_rwlocktype_none,
4064                                     ISC_FALSE);
4065                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4066         }
4067
4068         if (close_version)
4069                 closeversion(db, &version, ISC_FALSE);
4070
4071         dns_rbtnodechain_reset(&search.chain);
4072
4073         return (result);
4074 }
4075
4076 static isc_result_t
4077 zone_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4078                  isc_stdtime_t now, dns_dbnode_t **nodep,
4079                  dns_name_t *foundname,
4080                  dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4081 {
4082         UNUSED(db);
4083         UNUSED(name);
4084         UNUSED(options);
4085         UNUSED(now);
4086         UNUSED(nodep);
4087         UNUSED(foundname);
4088         UNUSED(rdataset);
4089         UNUSED(sigrdataset);
4090
4091         FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
4092
4093         /* NOTREACHED */
4094         return (ISC_R_NOTIMPLEMENTED);
4095 }
4096
4097 static isc_result_t
4098 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
4099         rbtdb_search_t *search = arg;
4100         rdatasetheader_t *header, *header_prev, *header_next;
4101         rdatasetheader_t *dname_header, *sigdname_header;
4102         isc_result_t result;
4103         nodelock_t *lock;
4104         isc_rwlocktype_t locktype;
4105
4106         /* XXX comment */
4107
4108         REQUIRE(search->zonecut == NULL);
4109
4110         /*
4111          * Keep compiler silent.
4112          */
4113         UNUSED(name);
4114
4115         lock = &(search->rbtdb->node_locks[node->locknum].lock);
4116         locktype = isc_rwlocktype_read;
4117         NODE_LOCK(lock, locktype);
4118
4119         /*
4120          * Look for a DNAME or RRSIG DNAME rdataset.
4121          */
4122         dname_header = NULL;
4123         sigdname_header = NULL;
4124         header_prev = NULL;
4125         for (header = node->data; header != NULL; header = header_next) {
4126                 header_next = header->next;
4127                 if (header->rdh_ttl <= search->now) {
4128                         /*
4129                          * This rdataset is stale.  If no one else is
4130                          * using the node, we can clean it up right
4131                          * now, otherwise we mark it as stale, and
4132                          * the node as dirty, so it will get cleaned
4133                          * up later.
4134                          */
4135                         if ((header->rdh_ttl <= search->now - RBTDB_VIRTUAL) &&
4136                             (locktype == isc_rwlocktype_write ||
4137                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4138                                 /*
4139                                  * We update the node's status only when we
4140                                  * can get write access; otherwise, we leave
4141                                  * others to this work.  Periodical cleaning
4142                                  * will eventually take the job as the last
4143                                  * resort.
4144                                  * We won't downgrade the lock, since other
4145                                  * rdatasets are probably stale, too.
4146                                  */
4147                                 locktype = isc_rwlocktype_write;
4148
4149                                 if (dns_rbtnode_refcurrent(node) == 0) {
4150                                         isc_mem_t *mctx;
4151
4152                                         /*
4153                                          * header->down can be non-NULL if the
4154                                          * refcount has just decremented to 0
4155                                          * but decrement_reference() has not
4156                                          * performed clean_cache_node(), in
4157                                          * which case we need to purge the
4158                                          * stale headers first.
4159                                          */
4160                                         mctx = search->rbtdb->common.mctx;
4161                                         clean_stale_headers(search->rbtdb,
4162                                                             mctx,
4163                                                             header);
4164                                         if (header_prev != NULL)
4165                                                 header_prev->next =
4166                                                         header->next;
4167                                         else
4168                                                 node->data = header->next;
4169                                         free_rdataset(search->rbtdb, mctx,
4170                                                       header);
4171                                 } else {
4172                                         header->attributes |=
4173                                                 RDATASET_ATTR_STALE;
4174                                         node->dirty = 1;
4175                                         header_prev = header;
4176                                 }
4177                         } else
4178                                 header_prev = header;
4179                 } else if (header->type == dns_rdatatype_dname &&
4180                            EXISTS(header)) {
4181                         dname_header = header;
4182                         header_prev = header;
4183                 } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
4184                          EXISTS(header)) {
4185                         sigdname_header = header;
4186                         header_prev = header;
4187                 } else
4188                         header_prev = header;
4189         }
4190
4191         if (dname_header != NULL &&
4192             (!DNS_TRUST_PENDING(dname_header->trust) ||
4193              (search->options & DNS_DBFIND_PENDINGOK) != 0)) {
4194                 /*
4195                  * We increment the reference count on node to ensure that
4196                  * search->zonecut_rdataset will still be valid later.
4197                  */
4198                 new_reference(search->rbtdb, node);
4199                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4200                 search->zonecut = node;
4201                 search->zonecut_rdataset = dname_header;
4202                 search->zonecut_sigrdataset = sigdname_header;
4203                 search->need_cleanup = ISC_TRUE;
4204                 result = DNS_R_PARTIALMATCH;
4205         } else
4206                 result = DNS_R_CONTINUE;
4207
4208         NODE_UNLOCK(lock, locktype);
4209
4210         return (result);
4211 }
4212
4213 static inline isc_result_t
4214 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
4215                      dns_dbnode_t **nodep, dns_name_t *foundname,
4216                      dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4217 {
4218         unsigned int i;
4219         dns_rbtnode_t *level_node;
4220         rdatasetheader_t *header, *header_prev, *header_next;
4221         rdatasetheader_t *found, *foundsig;
4222         isc_result_t result = ISC_R_NOTFOUND;
4223         dns_name_t name;
4224         dns_rbtdb_t *rbtdb;
4225         isc_boolean_t done;
4226         nodelock_t *lock;
4227         isc_rwlocktype_t locktype;
4228
4229         /*
4230          * Caller must be holding the tree lock.
4231          */
4232
4233         rbtdb = search->rbtdb;
4234         i = search->chain.level_matches;
4235         done = ISC_FALSE;
4236         do {
4237                 locktype = isc_rwlocktype_read;
4238                 lock = &rbtdb->node_locks[node->locknum].lock;
4239                 NODE_LOCK(lock, locktype);
4240
4241                 /*
4242                  * Look for NS and RRSIG NS rdatasets.
4243                  */
4244                 found = NULL;
4245                 foundsig = NULL;
4246                 header_prev = NULL;
4247                 for (header = node->data;
4248                      header != NULL;
4249                      header = header_next) {
4250                         header_next = header->next;
4251                         if (header->rdh_ttl <= search->now) {
4252                                 /*
4253                                  * This rdataset is stale.  If no one else is
4254                                  * using the node, we can clean it up right
4255                                  * now, otherwise we mark it as stale, and
4256                                  * the node as dirty, so it will get cleaned
4257                                  * up later.
4258                                  */
4259                                 if ((header->rdh_ttl <= search->now -
4260                                                     RBTDB_VIRTUAL) &&
4261                                     (locktype == isc_rwlocktype_write ||
4262                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4263                                         /*
4264                                          * We update the node's status only
4265                                          * when we can get write access.
4266                                          */
4267                                         locktype = isc_rwlocktype_write;
4268
4269                                         if (dns_rbtnode_refcurrent(node)
4270                                             == 0) {
4271                                                 isc_mem_t *m;
4272
4273                                                 m = search->rbtdb->common.mctx;
4274                                                 clean_stale_headers(
4275                                                         search->rbtdb,
4276                                                         m, header);
4277                                                 if (header_prev != NULL)
4278                                                         header_prev->next =
4279                                                                 header->next;
4280                                                 else
4281                                                         node->data =
4282                                                                 header->next;
4283                                                 free_rdataset(rbtdb, m,
4284                                                               header);
4285                                         } else {
4286                                                 header->attributes |=
4287                                                         RDATASET_ATTR_STALE;
4288                                                 node->dirty = 1;
4289                                                 header_prev = header;
4290                                         }
4291                                 } else
4292                                         header_prev = header;
4293                         } else if (EXISTS(header)) {
4294                                 /*
4295                                  * We've found an extant rdataset.  See if
4296                                  * we're interested in it.
4297                                  */
4298                                 if (header->type == dns_rdatatype_ns) {
4299                                         found = header;
4300                                         if (foundsig != NULL)
4301                                                 break;
4302                                 } else if (header->type ==
4303                                            RBTDB_RDATATYPE_SIGNS) {
4304                                         foundsig = header;
4305                                         if (found != NULL)
4306                                                 break;
4307                                 }
4308                                 header_prev = header;
4309                         } else
4310                                 header_prev = header;
4311                 }
4312
4313                 if (found != NULL) {
4314                         /*
4315                          * If we have to set foundname, we do it before
4316                          * anything else.  If we were to set foundname after
4317                          * we had set nodep or bound the rdataset, then we'd
4318                          * have to undo that work if dns_name_concatenate()
4319                          * failed.  By setting foundname first, there's
4320                          * nothing to undo if we have trouble.
4321                          */
4322                         if (foundname != NULL) {
4323                                 dns_name_init(&name, NULL);
4324                                 dns_rbt_namefromnode(node, &name);
4325                                 result = dns_name_copy(&name, foundname, NULL);
4326                                 while (result == ISC_R_SUCCESS && i > 0) {
4327                                         i--;
4328                                         level_node = search->chain.levels[i];
4329                                         dns_name_init(&name, NULL);
4330                                         dns_rbt_namefromnode(level_node,
4331                                                              &name);
4332                                         result =
4333                                                 dns_name_concatenate(foundname,
4334                                                                      &name,
4335                                                                      foundname,
4336                                                                      NULL);
4337                                 }
4338                                 if (result != ISC_R_SUCCESS) {
4339                                         *nodep = NULL;
4340                                         goto node_exit;
4341                                 }
4342                         }
4343                         result = DNS_R_DELEGATION;
4344                         if (nodep != NULL) {
4345                                 new_reference(search->rbtdb, node);
4346                                 *nodep = node;
4347                         }
4348                         bind_rdataset(search->rbtdb, node, found, search->now,
4349                                       rdataset);
4350                         if (foundsig != NULL)
4351                                 bind_rdataset(search->rbtdb, node, foundsig,
4352                                               search->now, sigrdataset);
4353                         if (need_headerupdate(found, search->now) ||
4354                             (foundsig != NULL &&
4355                              need_headerupdate(foundsig, search->now))) {
4356                                 if (locktype != isc_rwlocktype_write) {
4357                                         NODE_UNLOCK(lock, locktype);
4358                                         NODE_LOCK(lock, isc_rwlocktype_write);
4359                                         locktype = isc_rwlocktype_write;
4360                                 }
4361                                 if (need_headerupdate(found, search->now))
4362                                         update_header(search->rbtdb, found,
4363                                                       search->now);
4364                                 if (foundsig != NULL &&
4365                                     need_headerupdate(foundsig, search->now)) {
4366                                         update_header(search->rbtdb, foundsig,
4367                                                       search->now);
4368                                 }
4369                         }
4370                 }
4371
4372         node_exit:
4373                 NODE_UNLOCK(lock, locktype);
4374
4375                 if (found == NULL && i > 0) {
4376                         i--;
4377                         node = search->chain.levels[i];
4378                 } else
4379                         done = ISC_TRUE;
4380
4381         } while (!done);
4382
4383         return (result);
4384 }
4385
4386 static isc_result_t
4387 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
4388                   isc_stdtime_t now, dns_name_t *foundname,
4389                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4390 {
4391         dns_rbtnode_t *node;
4392         rdatasetheader_t *header, *header_next, *header_prev;
4393         rdatasetheader_t *found, *foundsig;
4394         isc_boolean_t empty_node;
4395         isc_result_t result;
4396         dns_fixedname_t fname, forigin;
4397         dns_name_t *name, *origin;
4398         rbtdb_rdatatype_t matchtype, sigmatchtype;
4399         nodelock_t *lock;
4400         isc_rwlocktype_t locktype;
4401
4402         matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
4403         sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
4404                                              dns_rdatatype_nsec);
4405
4406         do {
4407                 node = NULL;
4408                 dns_fixedname_init(&fname);
4409                 name = dns_fixedname_name(&fname);
4410                 dns_fixedname_init(&forigin);
4411                 origin = dns_fixedname_name(&forigin);
4412                 result = dns_rbtnodechain_current(&search->chain, name,
4413                                                   origin, &node);
4414                 if (result != ISC_R_SUCCESS)
4415                         return (result);
4416                 locktype = isc_rwlocktype_read;
4417                 lock = &(search->rbtdb->node_locks[node->locknum].lock);
4418                 NODE_LOCK(lock, locktype);
4419                 found = NULL;
4420                 foundsig = NULL;
4421                 empty_node = ISC_TRUE;
4422                 header_prev = NULL;
4423                 for (header = node->data;
4424                      header != NULL;
4425                      header = header_next) {
4426                         header_next = header->next;
4427                         if (header->rdh_ttl <= now) {
4428                                 /*
4429                                  * This rdataset is stale.  If no one else is
4430                                  * using the node, we can clean it up right
4431                                  * now, otherwise we mark it as stale, and the
4432                                  * node as dirty, so it will get cleaned up
4433                                  * later.
4434                                  */
4435                                 if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4436                                     (locktype == isc_rwlocktype_write ||
4437                                      NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4438                                         /*
4439                                          * We update the node's status only
4440                                          * when we can get write access.
4441                                          */
4442                                         locktype = isc_rwlocktype_write;
4443
4444                                         if (dns_rbtnode_refcurrent(node)
4445                                             == 0) {
4446                                                 isc_mem_t *m;
4447
4448                                                 m = search->rbtdb->common.mctx;
4449                                                 clean_stale_headers(
4450                                                         search->rbtdb,
4451                                                         m, header);
4452                                                 if (header_prev != NULL)
4453                                                         header_prev->next =
4454                                                                 header->next;
4455                                                 else
4456                                                         node->data = header->next;
4457                                                 free_rdataset(search->rbtdb, m,
4458                                                               header);
4459                                         } else {
4460                                                 header->attributes |=
4461                                                         RDATASET_ATTR_STALE;
4462                                                 node->dirty = 1;
4463                                                 header_prev = header;
4464                                         }
4465                                 } else
4466                                         header_prev = header;
4467                                 continue;
4468                         }
4469                         if (NONEXISTENT(header) ||
4470                             RBTDB_RDATATYPE_BASE(header->type) == 0) {
4471                                 header_prev = header;
4472                                 continue;
4473                         }
4474                         empty_node = ISC_FALSE;
4475                         if (header->type == matchtype)
4476                                 found = header;
4477                         else if (header->type == sigmatchtype)
4478                                 foundsig = header;
4479                         header_prev = header;
4480                 }
4481                 if (found != NULL) {
4482                         result = dns_name_concatenate(name, origin,
4483                                                       foundname, NULL);
4484                         if (result != ISC_R_SUCCESS)
4485                                 goto unlock_node;
4486                         bind_rdataset(search->rbtdb, node, found,
4487                                       now, rdataset);
4488                         if (foundsig != NULL)
4489                                 bind_rdataset(search->rbtdb, node, foundsig,
4490                                               now, sigrdataset);
4491                         new_reference(search->rbtdb, node);
4492                         *nodep = node;
4493                         result = DNS_R_COVERINGNSEC;
4494                 } else if (!empty_node) {
4495                         result = ISC_R_NOTFOUND;
4496                 } else
4497                         result = dns_rbtnodechain_prev(&search->chain, NULL,
4498                                                        NULL);
4499  unlock_node:
4500                 NODE_UNLOCK(lock, locktype);
4501         } while (empty_node && result == ISC_R_SUCCESS);
4502         return (result);
4503 }
4504
4505 static isc_result_t
4506 cache_find(dns_db_t *db, dns_name_t *name, dns_dbversion_t *version,
4507            dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
4508            dns_dbnode_t **nodep, dns_name_t *foundname,
4509            dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4510 {
4511         dns_rbtnode_t *node = NULL;
4512         isc_result_t result;
4513         rbtdb_search_t search;
4514         isc_boolean_t cname_ok = ISC_TRUE;
4515         isc_boolean_t empty_node;
4516         nodelock_t *lock;
4517         isc_rwlocktype_t locktype;
4518         rdatasetheader_t *header, *header_prev, *header_next;
4519         rdatasetheader_t *found, *nsheader;
4520         rdatasetheader_t *foundsig, *nssig, *cnamesig;
4521         rdatasetheader_t *update, *updatesig;
4522         rbtdb_rdatatype_t sigtype, negtype;
4523
4524         UNUSED(version);
4525
4526         search.rbtdb = (dns_rbtdb_t *)db;
4527
4528         REQUIRE(VALID_RBTDB(search.rbtdb));
4529         REQUIRE(version == NULL);
4530
4531         if (now == 0)
4532                 isc_stdtime_get(&now);
4533
4534         search.rbtversion = NULL;
4535         search.serial = 1;
4536         search.options = options;
4537         search.copy_name = ISC_FALSE;
4538         search.need_cleanup = ISC_FALSE;
4539         search.wild = ISC_FALSE;
4540         search.zonecut = NULL;
4541         dns_fixedname_init(&search.zonecut_name);
4542         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4543         search.now = now;
4544         update = NULL;
4545         updatesig = NULL;
4546
4547         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4548
4549         /*
4550          * Search down from the root of the tree.  If, while going down, we
4551          * encounter a callback node, cache_zonecut_callback() will search the
4552          * rdatasets at the zone cut for a DNAME rdataset.
4553          */
4554         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4555                                   &search.chain, DNS_RBTFIND_EMPTYDATA,
4556                                   cache_zonecut_callback, &search);
4557
4558         if (result == DNS_R_PARTIALMATCH) {
4559                 if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
4560                         result = find_coveringnsec(&search, nodep, now,
4561                                                    foundname, rdataset,
4562                                                    sigrdataset);
4563                         if (result == DNS_R_COVERINGNSEC)
4564                                 goto tree_exit;
4565                 }
4566                 if (search.zonecut != NULL) {
4567                     result = setup_delegation(&search, nodep, foundname,
4568                                               rdataset, sigrdataset);
4569                     goto tree_exit;
4570                 } else {
4571                 find_ns:
4572                         result = find_deepest_zonecut(&search, node, nodep,
4573                                                       foundname, rdataset,
4574                                                       sigrdataset);
4575                         goto tree_exit;
4576                 }
4577         } else if (result != ISC_R_SUCCESS)
4578                 goto tree_exit;
4579
4580         /*
4581          * Certain DNSSEC types are not subject to CNAME matching
4582          * (RFC4035, section 2.5 and RFC3007).
4583          *
4584          * We don't check for RRSIG, because we don't store RRSIG records
4585          * directly.
4586          */
4587         if (type == dns_rdatatype_key || type == dns_rdatatype_nsec)
4588                 cname_ok = ISC_FALSE;
4589
4590         /*
4591          * We now go looking for rdata...
4592          */
4593
4594         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4595         locktype = isc_rwlocktype_read;
4596         NODE_LOCK(lock, locktype);
4597
4598         found = NULL;
4599         foundsig = NULL;
4600         sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
4601         negtype = RBTDB_RDATATYPE_VALUE(0, type);
4602         nsheader = NULL;
4603         nssig = NULL;
4604         cnamesig = NULL;
4605         empty_node = ISC_TRUE;
4606         header_prev = NULL;
4607         for (header = node->data; header != NULL; header = header_next) {
4608                 header_next = header->next;
4609                 if (header->rdh_ttl <= now) {
4610                         /*
4611                          * This rdataset is stale.  If no one else is using the
4612                          * node, we can clean it up right now, otherwise we
4613                          * mark it as stale, and the node as dirty, so it will
4614                          * get cleaned up later.
4615                          */
4616                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4617                             (locktype == isc_rwlocktype_write ||
4618                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4619                                 /*
4620                                  * We update the node's status only when we
4621                                  * can get write access.
4622                                  */
4623                                 locktype = isc_rwlocktype_write;
4624
4625                                 if (dns_rbtnode_refcurrent(node) == 0) {
4626                                         isc_mem_t *mctx;
4627
4628                                         mctx = search.rbtdb->common.mctx;
4629                                         clean_stale_headers(search.rbtdb, mctx,
4630                                                             header);
4631                                         if (header_prev != NULL)
4632                                                 header_prev->next =
4633                                                         header->next;
4634                                         else
4635                                                 node->data = header->next;
4636                                         free_rdataset(search.rbtdb, mctx,
4637                                                       header);
4638                                 } else {
4639                                         header->attributes |=
4640                                                 RDATASET_ATTR_STALE;
4641                                         node->dirty = 1;
4642                                         header_prev = header;
4643                                 }
4644                         } else
4645                                 header_prev = header;
4646                 } else if (EXISTS(header)) {
4647                         /*
4648                          * We now know that there is at least one active
4649                          * non-stale rdataset at this node.
4650                          */
4651                         empty_node = ISC_FALSE;
4652
4653                         /*
4654                          * If we found a type we were looking for, remember
4655                          * it.
4656                          */
4657                         if (header->type == type ||
4658                             (type == dns_rdatatype_any &&
4659                              RBTDB_RDATATYPE_BASE(header->type) != 0) ||
4660                             (cname_ok && header->type ==
4661                              dns_rdatatype_cname)) {
4662                                 /*
4663                                  * We've found the answer.
4664                                  */
4665                                 found = header;
4666                                 if (header->type == dns_rdatatype_cname &&
4667                                     cname_ok &&
4668                                     cnamesig != NULL) {
4669                                         /*
4670                                          * If we've already got the CNAME RRSIG,
4671                                          * use it, otherwise change sigtype
4672                                          * so that we find it.
4673                                          */
4674                                         if (cnamesig != NULL)
4675                                                 foundsig = cnamesig;
4676                                         else
4677                                                 sigtype =
4678                                                     RBTDB_RDATATYPE_SIGCNAME;
4679                                         foundsig = cnamesig;
4680                                 }
4681                         } else if (header->type == sigtype) {
4682                                 /*
4683                                  * We've found the RRSIG rdataset for our
4684                                  * target type.  Remember it.
4685                                  */
4686                                 foundsig = header;
4687                         } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
4688                                    header->type == negtype) {
4689                                 /*
4690                                  * We've found a negative cache entry.
4691                                  */
4692                                 found = header;
4693                         } else if (header->type == dns_rdatatype_ns) {
4694                                 /*
4695                                  * Remember a NS rdataset even if we're
4696                                  * not specifically looking for it, because
4697                                  * we might need it later.
4698                                  */
4699                                 nsheader = header;
4700                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4701                                 /*
4702                                  * If we need the NS rdataset, we'll also
4703                                  * need its signature.
4704                                  */
4705                                 nssig = header;
4706                         } else if (cname_ok &&
4707                                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
4708                                 /*
4709                                  * If we get a CNAME match, we'll also need
4710                                  * its signature.
4711                                  */
4712                                 cnamesig = header;
4713                         }
4714                         header_prev = header;
4715                 } else
4716                         header_prev = header;
4717         }
4718
4719         if (empty_node) {
4720                 /*
4721                  * We have an exact match for the name, but there are no
4722                  * extant rdatasets.  That means that this node doesn't
4723                  * meaningfully exist, and that we really have a partial match.
4724                  */
4725                 NODE_UNLOCK(lock, locktype);
4726                 goto find_ns;
4727         }
4728
4729         /*
4730          * If we didn't find what we were looking for...
4731          */
4732         if (found == NULL ||
4733             (DNS_TRUST_ADDITIONAL(found->trust) &&
4734              ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
4735             (found->trust == dns_trust_glue &&
4736              ((options & DNS_DBFIND_GLUEOK) == 0)) ||
4737             (DNS_TRUST_PENDING(found->trust) &&
4738              ((options & DNS_DBFIND_PENDINGOK) == 0))) {
4739                 /*
4740                  * If there is an NS rdataset at this node, then this is the
4741                  * deepest zone cut.
4742                  */
4743                 if (nsheader != NULL) {
4744                         if (nodep != NULL) {
4745                                 new_reference(search.rbtdb, node);
4746                                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4747                                 *nodep = node;
4748                         }
4749                         bind_rdataset(search.rbtdb, node, nsheader, search.now,
4750                                       rdataset);
4751                         if (need_headerupdate(nsheader, search.now))
4752                                 update = nsheader;
4753                         if (nssig != NULL) {
4754                                 bind_rdataset(search.rbtdb, node, nssig,
4755                                               search.now, sigrdataset);
4756                                 if (need_headerupdate(nssig, search.now))
4757                                         updatesig = nssig;
4758                         }
4759                         result = DNS_R_DELEGATION;
4760                         goto node_exit;
4761                 }
4762
4763                 /*
4764                  * Go find the deepest zone cut.
4765                  */
4766                 NODE_UNLOCK(lock, locktype);
4767                 goto find_ns;
4768         }
4769
4770         /*
4771          * We found what we were looking for, or we found a CNAME.
4772          */
4773
4774         if (nodep != NULL) {
4775                 new_reference(search.rbtdb, node);
4776                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4777                 *nodep = node;
4778         }
4779
4780         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
4781                 /*
4782                  * We found a negative cache entry.
4783                  */
4784                 if (NXDOMAIN(found))
4785                         result = DNS_R_NCACHENXDOMAIN;
4786                 else
4787                         result = DNS_R_NCACHENXRRSET;
4788         } else if (type != found->type &&
4789                    type != dns_rdatatype_any &&
4790                    found->type == dns_rdatatype_cname) {
4791                 /*
4792                  * We weren't doing an ANY query and we found a CNAME instead
4793                  * of the type we were looking for, so we need to indicate
4794                  * that result to the caller.
4795                  */
4796                 result = DNS_R_CNAME;
4797         } else {
4798                 /*
4799                  * An ordinary successful query!
4800                  */
4801                 result = ISC_R_SUCCESS;
4802         }
4803
4804         if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
4805             result == DNS_R_NCACHENXRRSET) {
4806                 bind_rdataset(search.rbtdb, node, found, search.now,
4807                               rdataset);
4808                 if (need_headerupdate(found, search.now))
4809                         update = found;
4810                 if (foundsig != NULL) {
4811                         bind_rdataset(search.rbtdb, node, foundsig, search.now,
4812                                       sigrdataset);
4813                         if (need_headerupdate(foundsig, search.now))
4814                                 updatesig = foundsig;
4815                 }
4816         }
4817
4818  node_exit:
4819         if ((update != NULL || updatesig != NULL) &&
4820             locktype != isc_rwlocktype_write) {
4821                 NODE_UNLOCK(lock, locktype);
4822                 NODE_LOCK(lock, isc_rwlocktype_write);
4823                 locktype = isc_rwlocktype_write;
4824         }
4825         if (update != NULL && need_headerupdate(update, search.now))
4826                 update_header(search.rbtdb, update, search.now);
4827         if (updatesig != NULL && need_headerupdate(updatesig, search.now))
4828                 update_header(search.rbtdb, updatesig, search.now);
4829
4830         NODE_UNLOCK(lock, locktype);
4831
4832  tree_exit:
4833         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4834
4835         /*
4836          * If we found a zonecut but aren't going to use it, we have to
4837          * let go of it.
4838          */
4839         if (search.need_cleanup) {
4840                 node = search.zonecut;
4841                 lock = &(search.rbtdb->node_locks[node->locknum].lock);
4842
4843                 NODE_LOCK(lock, isc_rwlocktype_read);
4844                 decrement_reference(search.rbtdb, node, 0,
4845                                     isc_rwlocktype_read, isc_rwlocktype_none,
4846                                     ISC_FALSE);
4847                 NODE_UNLOCK(lock, isc_rwlocktype_read);
4848         }
4849
4850         dns_rbtnodechain_reset(&search.chain);
4851
4852         return (result);
4853 }
4854
4855 static isc_result_t
4856 cache_findzonecut(dns_db_t *db, dns_name_t *name, unsigned int options,
4857                   isc_stdtime_t now, dns_dbnode_t **nodep,
4858                   dns_name_t *foundname,
4859                   dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset)
4860 {
4861         dns_rbtnode_t *node = NULL;
4862         nodelock_t *lock;
4863         isc_result_t result;
4864         rbtdb_search_t search;
4865         rdatasetheader_t *header, *header_prev, *header_next;
4866         rdatasetheader_t *found, *foundsig;
4867         unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
4868         isc_rwlocktype_t locktype;
4869
4870         search.rbtdb = (dns_rbtdb_t *)db;
4871
4872         REQUIRE(VALID_RBTDB(search.rbtdb));
4873
4874         if (now == 0)
4875                 isc_stdtime_get(&now);
4876
4877         search.rbtversion = NULL;
4878         search.serial = 1;
4879         search.options = options;
4880         search.copy_name = ISC_FALSE;
4881         search.need_cleanup = ISC_FALSE;
4882         search.wild = ISC_FALSE;
4883         search.zonecut = NULL;
4884         dns_fixedname_init(&search.zonecut_name);
4885         dns_rbtnodechain_init(&search.chain, search.rbtdb->common.mctx);
4886         search.now = now;
4887
4888         if ((options & DNS_DBFIND_NOEXACT) != 0)
4889                 rbtoptions |= DNS_RBTFIND_NOEXACT;
4890
4891         RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
4892
4893         /*
4894          * Search down from the root of the tree.
4895          */
4896         result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
4897                                   &search.chain, rbtoptions, NULL, &search);
4898
4899         if (result == DNS_R_PARTIALMATCH) {
4900         find_ns:
4901                 result = find_deepest_zonecut(&search, node, nodep, foundname,
4902                                               rdataset, sigrdataset);
4903                 goto tree_exit;
4904         } else if (result != ISC_R_SUCCESS)
4905                 goto tree_exit;
4906
4907         /*
4908          * We now go looking for an NS rdataset at the node.
4909          */
4910
4911         lock = &(search.rbtdb->node_locks[node->locknum].lock);
4912         locktype = isc_rwlocktype_read;
4913         NODE_LOCK(lock, locktype);
4914
4915         found = NULL;
4916         foundsig = NULL;
4917         header_prev = NULL;
4918         for (header = node->data; header != NULL; header = header_next) {
4919                 header_next = header->next;
4920                 if (header->rdh_ttl <= now) {
4921                         /*
4922                          * This rdataset is stale.  If no one else is using the
4923                          * node, we can clean it up right now, otherwise we
4924                          * mark it as stale, and the node as dirty, so it will
4925                          * get cleaned up later.
4926                          */
4927                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
4928                             (locktype == isc_rwlocktype_write ||
4929                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
4930                                 /*
4931                                  * We update the node's status only when we
4932                                  * can get write access.
4933                                  */
4934                                 locktype = isc_rwlocktype_write;
4935
4936                                 if (dns_rbtnode_refcurrent(node) == 0) {
4937                                         isc_mem_t *mctx;
4938
4939                                         mctx = search.rbtdb->common.mctx;
4940                                         clean_stale_headers(search.rbtdb, mctx,
4941                                                             header);
4942                                         if (header_prev != NULL)
4943                                                 header_prev->next =
4944                                                         header->next;
4945                                         else
4946                                                 node->data = header->next;
4947                                         free_rdataset(search.rbtdb, mctx,
4948                                                       header);
4949                                 } else {
4950                                         header->attributes |=
4951                                                 RDATASET_ATTR_STALE;
4952                                         node->dirty = 1;
4953                                         header_prev = header;
4954                                 }
4955                         } else
4956                                 header_prev = header;
4957                 } else if (EXISTS(header)) {
4958                         /*
4959                          * If we found a type we were looking for, remember
4960                          * it.
4961                          */
4962                         if (header->type == dns_rdatatype_ns) {
4963                                 /*
4964                                  * Remember a NS rdataset even if we're
4965                                  * not specifically looking for it, because
4966                                  * we might need it later.
4967                                  */
4968                                 found = header;
4969                         } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
4970                                 /*
4971                                  * If we need the NS rdataset, we'll also
4972                                  * need its signature.
4973                                  */
4974                                 foundsig = header;
4975                         }
4976                         header_prev = header;
4977                 } else
4978                         header_prev = header;
4979         }
4980
4981         if (found == NULL) {
4982                 /*
4983                  * No NS records here.
4984                  */
4985                 NODE_UNLOCK(lock, locktype);
4986                 goto find_ns;
4987         }
4988
4989         if (nodep != NULL) {
4990                 new_reference(search.rbtdb, node);
4991                 INSIST(!ISC_LINK_LINKED(node, deadlink));
4992                 *nodep = node;
4993         }
4994
4995         bind_rdataset(search.rbtdb, node, found, search.now, rdataset);
4996         if (foundsig != NULL)
4997                 bind_rdataset(search.rbtdb, node, foundsig, search.now,
4998                               sigrdataset);
4999
5000         if (need_headerupdate(found, search.now) ||
5001             (foundsig != NULL &&  need_headerupdate(foundsig, search.now))) {
5002                 if (locktype != isc_rwlocktype_write) {
5003                         NODE_UNLOCK(lock, locktype);
5004                         NODE_LOCK(lock, isc_rwlocktype_write);
5005                         locktype = isc_rwlocktype_write;
5006                 }
5007                 if (need_headerupdate(found, search.now))
5008                         update_header(search.rbtdb, found, search.now);
5009                 if (foundsig != NULL &&
5010                     need_headerupdate(foundsig, search.now)) {
5011                         update_header(search.rbtdb, foundsig, search.now);
5012                 }
5013         }
5014
5015         NODE_UNLOCK(lock, locktype);
5016
5017  tree_exit:
5018         RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
5019
5020         INSIST(!search.need_cleanup);
5021
5022         dns_rbtnodechain_reset(&search.chain);
5023
5024         if (result == DNS_R_DELEGATION)
5025                 result = ISC_R_SUCCESS;
5026
5027         return (result);
5028 }
5029
5030 static void
5031 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
5032         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5033         dns_rbtnode_t *node = (dns_rbtnode_t *)source;
5034         unsigned int refs;
5035
5036         REQUIRE(VALID_RBTDB(rbtdb));
5037         REQUIRE(targetp != NULL && *targetp == NULL);
5038
5039         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
5040         dns_rbtnode_refincrement(node, &refs);
5041         INSIST(refs != 0);
5042         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
5043
5044         *targetp = source;
5045 }
5046
5047 static void
5048 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
5049         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5050         dns_rbtnode_t *node;
5051         isc_boolean_t want_free = ISC_FALSE;
5052         isc_boolean_t inactive = ISC_FALSE;
5053         rbtdb_nodelock_t *nodelock;
5054
5055         REQUIRE(VALID_RBTDB(rbtdb));
5056         REQUIRE(targetp != NULL && *targetp != NULL);
5057
5058         node = (dns_rbtnode_t *)(*targetp);
5059         nodelock = &rbtdb->node_locks[node->locknum];
5060
5061         NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
5062
5063         if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
5064                                 isc_rwlocktype_none, ISC_FALSE)) {
5065                 if (isc_refcount_current(&nodelock->references) == 0 &&
5066                     nodelock->exiting) {
5067                         inactive = ISC_TRUE;
5068                 }
5069         }
5070
5071         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
5072
5073         *targetp = NULL;
5074
5075         if (inactive) {
5076                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
5077                 rbtdb->active--;
5078                 if (rbtdb->active == 0)
5079                         want_free = ISC_TRUE;
5080                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
5081                 if (want_free) {
5082                         char buf[DNS_NAME_FORMATSIZE];
5083                         if (dns_name_dynamic(&rbtdb->common.origin))
5084                                 dns_name_format(&rbtdb->common.origin, buf,
5085                                                 sizeof(buf));
5086                         else
5087                                 strcpy(buf, "<UNKNOWN>");
5088                         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
5089                                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
5090                                       "calling free_rbtdb(%s)", buf);
5091                         free_rbtdb(rbtdb, ISC_TRUE, NULL);
5092                 }
5093         }
5094 }
5095
5096 static isc_result_t
5097 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
5098         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5099         dns_rbtnode_t *rbtnode = node;
5100         rdatasetheader_t *header;
5101         isc_boolean_t force_expire = ISC_FALSE;
5102         /*
5103          * These are the category and module used by the cache cleaner.
5104          */
5105         isc_boolean_t log = ISC_FALSE;
5106         isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
5107         isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
5108         int level = ISC_LOG_DEBUG(2);
5109         char printname[DNS_NAME_FORMATSIZE];
5110
5111         REQUIRE(VALID_RBTDB(rbtdb));
5112
5113         /*
5114          * Caller must hold a tree lock.
5115          */
5116
5117         if (now == 0)
5118                 isc_stdtime_get(&now);
5119
5120         if (rbtdb->overmem) {
5121                 isc_uint32_t val;
5122
5123                 isc_random_get(&val);
5124                 /*
5125                  * XXXDCL Could stand to have a better policy, like LRU.
5126                  */
5127                 force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
5128
5129                 /*
5130                  * Note that 'log' can be true IFF rbtdb->overmem is also true.
5131                  * rbtdb->overmem can currently only be true for cache
5132                  * databases -- hence all of the "overmem cache" log strings.
5133                  */
5134                 log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
5135                 if (log)
5136                         isc_log_write(dns_lctx, category, module, level,
5137                                       "overmem cache: %s %s",
5138                                       force_expire ? "FORCE" : "check",
5139                                       dns_rbt_formatnodename(rbtnode,
5140                                                            printname,
5141                                                            sizeof(printname)));
5142         }
5143
5144         /*
5145          * We may not need write access, but this code path is not performance
5146          * sensitive, so it should be okay to always lock as a writer.
5147          */
5148         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5149                   isc_rwlocktype_write);
5150
5151         for (header = rbtnode->data; header != NULL; header = header->next)
5152                 if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
5153                         /*
5154                          * We don't check if refcurrent(rbtnode) == 0 and try
5155                          * to free like we do in cache_find(), because
5156                          * refcurrent(rbtnode) must be non-zero.  This is so
5157                          * because 'node' is an argument to the function.
5158                          */
5159                         header->attributes |= RDATASET_ATTR_STALE;
5160                         rbtnode->dirty = 1;
5161                         if (log)
5162                                 isc_log_write(dns_lctx, category, module,
5163                                               level, "overmem cache: stale %s",
5164                                               printname);
5165                 } else if (force_expire) {
5166                         if (! RETAIN(header)) {
5167                                 set_ttl(rbtdb, header, 0);
5168                                 header->attributes |= RDATASET_ATTR_STALE;
5169                                 rbtnode->dirty = 1;
5170                         } else if (log) {
5171                                 isc_log_write(dns_lctx, category, module,
5172                                               level, "overmem cache: "
5173                                               "reprieve by RETAIN() %s",
5174                                               printname);
5175                         }
5176                 } else if (rbtdb->overmem && log)
5177                         isc_log_write(dns_lctx, category, module, level,
5178                                       "overmem cache: saved %s", printname);
5179
5180         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5181                     isc_rwlocktype_write);
5182
5183         return (ISC_R_SUCCESS);
5184 }
5185
5186 static void
5187 overmem(dns_db_t *db, isc_boolean_t overmem) {
5188         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5189
5190         if (IS_CACHE(rbtdb))
5191                 rbtdb->overmem = overmem;
5192 }
5193
5194 static void
5195 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
5196         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5197         dns_rbtnode_t *rbtnode = node;
5198         isc_boolean_t first;
5199
5200         REQUIRE(VALID_RBTDB(rbtdb));
5201
5202         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5203                   isc_rwlocktype_read);
5204
5205         fprintf(out, "node %p, %u references, locknum = %u\n",
5206                 rbtnode, dns_rbtnode_refcurrent(rbtnode),
5207                 rbtnode->locknum);
5208         if (rbtnode->data != NULL) {
5209                 rdatasetheader_t *current, *top_next;
5210
5211                 for (current = rbtnode->data; current != NULL;
5212                      current = top_next) {
5213                         top_next = current->next;
5214                         first = ISC_TRUE;
5215                         fprintf(out, "\ttype %u", current->type);
5216                         do {
5217                                 if (!first)
5218                                         fprintf(out, "\t");
5219                                 first = ISC_FALSE;
5220                                 fprintf(out,
5221                                         "\tserial = %lu, ttl = %u, "
5222                                         "trust = %u, attributes = %u, "
5223                                         "resign = %u\n",
5224                                         (unsigned long)current->serial,
5225                                         current->rdh_ttl,
5226                                         current->trust,
5227                                         current->attributes,
5228                                         current->resign);
5229                                 current = current->down;
5230                         } while (current != NULL);
5231                 }
5232         } else
5233                 fprintf(out, "(empty)\n");
5234
5235         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5236                     isc_rwlocktype_read);
5237 }
5238
5239 static isc_result_t
5240 createiterator(dns_db_t *db, unsigned int options, dns_dbiterator_t **iteratorp)
5241 {
5242         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5243         rbtdb_dbiterator_t *rbtdbiter;
5244
5245         REQUIRE(VALID_RBTDB(rbtdb));
5246
5247         rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
5248         if (rbtdbiter == NULL)
5249                 return (ISC_R_NOMEMORY);
5250
5251         rbtdbiter->common.methods = &dbiterator_methods;
5252         rbtdbiter->common.db = NULL;
5253         dns_db_attach(db, &rbtdbiter->common.db);
5254         rbtdbiter->common.relative_names =
5255                         ISC_TF((options & DNS_DB_RELATIVENAMES) != 0);
5256         rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
5257         rbtdbiter->common.cleaning = ISC_FALSE;
5258         rbtdbiter->paused = ISC_TRUE;
5259         rbtdbiter->tree_locked = isc_rwlocktype_none;
5260         rbtdbiter->result = ISC_R_SUCCESS;
5261         dns_fixedname_init(&rbtdbiter->name);
5262         dns_fixedname_init(&rbtdbiter->origin);
5263         rbtdbiter->node = NULL;
5264         rbtdbiter->delete = 0;
5265         rbtdbiter->nsec3only = ISC_TF((options & DNS_DB_NSEC3ONLY) != 0);
5266         rbtdbiter->nonsec3 = ISC_TF((options & DNS_DB_NONSEC3) != 0);
5267         memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
5268         dns_rbtnodechain_init(&rbtdbiter->chain, db->mctx);
5269         dns_rbtnodechain_init(&rbtdbiter->nsec3chain, db->mctx);
5270         if (rbtdbiter->nsec3only)
5271                 rbtdbiter->current = &rbtdbiter->nsec3chain;
5272         else
5273                 rbtdbiter->current = &rbtdbiter->chain;
5274
5275         *iteratorp = (dns_dbiterator_t *)rbtdbiter;
5276
5277         return (ISC_R_SUCCESS);
5278 }
5279
5280 static isc_result_t
5281 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5282                   dns_rdatatype_t type, dns_rdatatype_t covers,
5283                   isc_stdtime_t now, dns_rdataset_t *rdataset,
5284                   dns_rdataset_t *sigrdataset)
5285 {
5286         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5287         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5288         rdatasetheader_t *header, *header_next, *found, *foundsig;
5289         rbtdb_serial_t serial;
5290         rbtdb_version_t *rbtversion = version;
5291         isc_boolean_t close_version = ISC_FALSE;
5292         rbtdb_rdatatype_t matchtype, sigmatchtype;
5293
5294         REQUIRE(VALID_RBTDB(rbtdb));
5295         REQUIRE(type != dns_rdatatype_any);
5296
5297         if (rbtversion == NULL) {
5298                 currentversion(db, (dns_dbversion_t **) (void *)(&rbtversion));
5299                 close_version = ISC_TRUE;
5300         }
5301         serial = rbtversion->serial;
5302         now = 0;
5303
5304         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5305                   isc_rwlocktype_read);
5306
5307         found = NULL;
5308         foundsig = NULL;
5309         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5310         if (covers == 0)
5311                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5312         else
5313                 sigmatchtype = 0;
5314
5315         for (header = rbtnode->data; header != NULL; header = header_next) {
5316                 header_next = header->next;
5317                 do {
5318                         if (header->serial <= serial &&
5319                             !IGNORE(header)) {
5320                                 /*
5321                                  * Is this a "this rdataset doesn't
5322                                  * exist" record?
5323                                  */
5324                                 if (NONEXISTENT(header))
5325                                         header = NULL;
5326                                 break;
5327                         } else
5328                                 header = header->down;
5329                 } while (header != NULL);
5330                 if (header != NULL) {
5331                         /*
5332                          * We have an active, extant rdataset.  If it's a
5333                          * type we're looking for, remember it.
5334                          */
5335                         if (header->type == matchtype) {
5336                                 found = header;
5337                                 if (foundsig != NULL)
5338                                         break;
5339                         } else if (header->type == sigmatchtype) {
5340                                 foundsig = header;
5341                                 if (found != NULL)
5342                                         break;
5343                         }
5344                 }
5345         }
5346         if (found != NULL) {
5347                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5348                 if (foundsig != NULL)
5349                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5350                                       sigrdataset);
5351         }
5352
5353         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
5354                     isc_rwlocktype_read);
5355
5356         if (close_version)
5357                 closeversion(db, (dns_dbversion_t **) (void *)(&rbtversion),
5358                              ISC_FALSE);
5359
5360         if (found == NULL)
5361                 return (ISC_R_NOTFOUND);
5362
5363         return (ISC_R_SUCCESS);
5364 }
5365
5366 static isc_result_t
5367 cache_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5368                    dns_rdatatype_t type, dns_rdatatype_t covers,
5369                    isc_stdtime_t now, dns_rdataset_t *rdataset,
5370                    dns_rdataset_t *sigrdataset)
5371 {
5372         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5373         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5374         rdatasetheader_t *header, *header_next, *found, *foundsig;
5375         rbtdb_rdatatype_t matchtype, sigmatchtype, negtype;
5376         isc_result_t result;
5377         nodelock_t *lock;
5378         isc_rwlocktype_t locktype;
5379
5380         REQUIRE(VALID_RBTDB(rbtdb));
5381         REQUIRE(type != dns_rdatatype_any);
5382
5383         UNUSED(version);
5384
5385         result = ISC_R_SUCCESS;
5386
5387         if (now == 0)
5388                 isc_stdtime_get(&now);
5389
5390         lock = &rbtdb->node_locks[rbtnode->locknum].lock;
5391         locktype = isc_rwlocktype_read;
5392         NODE_LOCK(lock, locktype);
5393
5394         found = NULL;
5395         foundsig = NULL;
5396         matchtype = RBTDB_RDATATYPE_VALUE(type, covers);
5397         negtype = RBTDB_RDATATYPE_VALUE(0, type);
5398         if (covers == 0)
5399                 sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
5400         else
5401                 sigmatchtype = 0;
5402
5403         for (header = rbtnode->data; header != NULL; header = header_next) {
5404                 header_next = header->next;
5405                 if (header->rdh_ttl <= now) {
5406                         if ((header->rdh_ttl <= now - RBTDB_VIRTUAL) &&
5407                             (locktype == isc_rwlocktype_write ||
5408                              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS)) {
5409                                 /*
5410                                  * We update the node's status only when we
5411                                  * can get write access.
5412                                  */
5413                                 locktype = isc_rwlocktype_write;
5414
5415                                 /*
5416                                  * We don't check if refcurrent(rbtnode) == 0
5417                                  * and try to free like we do in cache_find(),
5418                                  * because refcurrent(rbtnode) must be
5419                                  * non-zero.  This is so because 'node' is an
5420                                  * argument to the function.
5421                                  */
5422                                 header->attributes |= RDATASET_ATTR_STALE;
5423                                 rbtnode->dirty = 1;
5424                         }
5425                 } else if (EXISTS(header)) {
5426                         if (header->type == matchtype)
5427                                 found = header;
5428                         else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
5429                                  header->type == negtype)
5430                                 found = header;
5431                         else if (header->type == sigmatchtype)
5432                                 foundsig = header;
5433                 }
5434         }
5435         if (found != NULL) {
5436                 bind_rdataset(rbtdb, rbtnode, found, now, rdataset);
5437                 if (foundsig != NULL)
5438                         bind_rdataset(rbtdb, rbtnode, foundsig, now,
5439                                       sigrdataset);
5440         }
5441
5442         NODE_UNLOCK(lock, locktype);
5443
5444         if (found == NULL)
5445                 return (ISC_R_NOTFOUND);
5446
5447         if (RBTDB_RDATATYPE_BASE(found->type) == 0) {
5448                 /*
5449                  * We found a negative cache entry.
5450                  */
5451                 if (NXDOMAIN(found))
5452                         result = DNS_R_NCACHENXDOMAIN;
5453                 else
5454                         result = DNS_R_NCACHENXRRSET;
5455         }
5456
5457         return (result);
5458 }
5459
5460 static isc_result_t
5461 allrdatasets(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
5462              isc_stdtime_t now, dns_rdatasetiter_t **iteratorp)
5463 {
5464         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
5465         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
5466         rbtdb_version_t *rbtversion = version;
5467         rbtdb_rdatasetiter_t *iterator;
5468         unsigned int refs;
5469
5470         REQUIRE(VALID_RBTDB(rbtdb));
5471
5472         iterator = isc_mem_get(rbtdb->common.mctx, sizeof(*iterator));
5473         if (iterator == NULL)
5474                 return (ISC_R_NOMEMORY);
5475
5476         if ((db->attributes & DNS_DBATTR_CACHE) == 0) {
5477                 now = 0;
5478                 if (rbtversion == NULL)
5479                         currentversion(db,
5480                                  (dns_dbversion_t **) (void *)(&rbtversion));
5481                 else {
5482                         unsigned int refs;
5483
5484                         isc_refcount_increment(&rbtversion->references,
5485                                                &refs);
5486                         INSIST(refs > 1);
5487                 }
5488         } else {
5489                 if (now == 0)
5490                         isc_stdtime_get(&now);
5491                 rbtversion = NULL;
5492         }
5493
5494         iterator->common.magic = DNS_RDATASETITER_MAGIC;
5495         iterator->common.methods = &rdatasetiter_methods;
5496         iterator->common.db = db;
5497         iterator->common.node = node;
5498         iterator->common.version = (dns_dbversion_t *)rbtversion;
5499         iterator->common.now = now;
5500
5501         NODE_STRONGLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5502
5503         dns_rbtnode_refincrement(rbtnode, &refs);
5504         INSIST(refs != 0);
5505
5506         iterator->current = NULL;
5507
5508         NODE_STRONGUNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock);
5509
5510         *iteratorp = (dns_rdatasetiter_t *)iterator;
5511
5512         return (ISC_R_SUCCESS);
5513 }
5514
5515 static isc_boolean_t
5516 cname_and_other_data(dns_rbtnode_t *node, rbtdb_serial_t serial) {
5517         rdatasetheader_t *header, *header_next;
5518         isc_boolean_t cname, other_data;
5519         dns_rdatatype_t rdtype;
5520
5521         /*
5522          * The caller must hold the node lock.
5523          */
5524
5525         /*
5526          * Look for CNAME and "other data" rdatasets active in our version.
5527          */
5528         cname = ISC_FALSE;
5529         other_data = ISC_FALSE;
5530         for (header = node->data; header != NULL; header = header_next) {
5531                 header_next = header->next;
5532                 if (header->type == dns_rdatatype_cname) {
5533                         /*
5534                          * Look for an active extant CNAME.
5535                          */
5536                         do {
5537                                 if (header->serial <= serial &&
5538                                     !IGNORE(header)) {
5539                                         /*
5540                                          * Is this a "this rdataset doesn't
5541                                          * exist" record?
5542                                          */
5543                                         if (NONEXISTENT(header))
5544                                                 header = NULL;
5545                                         break;
5546                                 } else
5547                                         header = header->down;
5548                         } while (header != NULL);
5549                         if (header != NULL)
5550                                 cname = ISC_TRUE;
5551                 } else {
5552                         /*
5553                          * Look for active extant "other data".
5554                          *
5555                          * "Other data" is any rdataset whose type is not
5556                          * KEY, NSEC, SIG or RRSIG.
5557                          */
5558                         rdtype = RBTDB_RDATATYPE_BASE(header->type);
5559                         if (rdtype != dns_rdatatype_key &&
5560                             rdtype != dns_rdatatype_sig &&
5561                             rdtype != dns_rdatatype_nsec &&
5562                             rdtype != dns_rdatatype_rrsig) {
5563                                 /*
5564                                  * Is it active and extant?
5565                                  */
5566                                 do {
5567                                         if (header->serial <= serial &&
5568                                             !IGNORE(header)) {
5569                                                 /*
5570                                                  * Is this a "this rdataset
5571                                                  * doesn't exist" record?
5572                                                  */
5573                                                 if (NONEXISTENT(header))
5574                                                         header = NULL;
5575                                                 break;
5576                                         } else
5577                                                 header = header->down;
5578                                 } while (header != NULL);
5579                                 if (header != NULL)
5580                                         other_data = ISC_TRUE;
5581                         }
5582                 }
5583         }
5584
5585         if (cname && other_data)
5586                 return (ISC_TRUE);
5587
5588         return (ISC_FALSE);
5589 }
5590
5591 static isc_result_t
5592 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader) {
5593         isc_result_t result;
5594
5595         INSIST(!IS_CACHE(rbtdb));
5596         INSIST(newheader->heap_index == 0);
5597         INSIST(!ISC_LINK_LINKED(newheader, link));
5598
5599         result = isc_heap_insert(rbtdb->heaps[idx], newheader);
5600         return (result);
5601 }
5602
5603 static isc_result_t
5604 add(dns_rbtdb_t *rbtdb, dns_rbtnode_t *rbtnode, rbtdb_version_t *rbtversion,
5605     rdatasetheader_t *newheader, unsigned int options, isc_boolean_t loading,
5606     dns_rdataset_t *addedrdataset, isc_stdtime_t now)
5607 {
5608         rbtdb_changed_t *changed = NULL;
5609         rdatasetheader_t *topheader, *topheader_prev, *header;
5610         unsigned char *merged;
5611         isc_result_t result;
5612         isc_boolean_t header_nx;
5613         isc_boolean_t newheader_nx;
5614         isc_boolean_t merge;
5615         dns_rdatatype_t rdtype, covers;
5616         rbtdb_rdatatype_t negtype;
5617         dns_trust_t trust;
5618         int idx;
5619
5620         /*
5621          * Add an rdatasetheader_t to a node.
5622          */
5623
5624         /*
5625          * Caller must be holding the node lock.
5626          */
5627
5628         if ((options & DNS_DBADD_MERGE) != 0) {
5629                 REQUIRE(rbtversion != NULL);
5630                 merge = ISC_TRUE;
5631         } else
5632                 merge = ISC_FALSE;
5633
5634         if ((options & DNS_DBADD_FORCE) != 0)
5635                 trust = dns_trust_ultimate;
5636         else
5637                 trust = newheader->trust;
5638
5639         if (rbtversion != NULL && !loading) {
5640                 /*
5641                  * We always add a changed record, even if no changes end up
5642                  * being made to this node, because it's harmless and
5643                  * simplifies the code.
5644                  */
5645                 changed = add_changed(rbtdb, rbtversion, rbtnode);
5646                 if (changed == NULL) {
5647                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5648                         return (ISC_R_NOMEMORY);
5649                 }
5650         }
5651
5652         newheader_nx = NONEXISTENT(newheader) ? ISC_TRUE : ISC_FALSE;
5653         topheader_prev = NULL;
5654
5655         negtype = 0;
5656         if (rbtversion == NULL && !newheader_nx) {
5657                 rdtype = RBTDB_RDATATYPE_BASE(newheader->type);
5658                 if (rdtype == 0) {
5659                         /*
5660                          * We're adding a negative cache entry.
5661                          */
5662                         covers = RBTDB_RDATATYPE_EXT(newheader->type);
5663                         if (covers == dns_rdatatype_any) {
5664                                 /*
5665                                  * We're adding an negative cache entry
5666                                  * which covers all types (NXDOMAIN,
5667                                  * NODATA(QTYPE=ANY)).
5668                                  *
5669                                  * We make all other data stale so that the
5670                                  * only rdataset that can be found at this
5671                                  * node is the negative cache entry.
5672                                  */
5673                                 for (topheader = rbtnode->data;
5674                                      topheader != NULL;
5675                                      topheader = topheader->next) {
5676                                         set_ttl(rbtdb, topheader, 0);
5677                                         topheader->attributes |=
5678                                                 RDATASET_ATTR_STALE;
5679                                 }
5680                                 rbtnode->dirty = 1;
5681                                 goto find_header;
5682                         }
5683                         negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
5684                 } else {
5685                         /*
5686                          * We're adding something that isn't a
5687                          * negative cache entry.  Look for an extant
5688                          * non-stale NXDOMAIN/NODATA(QTYPE=ANY) negative
5689                          * cache entry.
5690                          */
5691                         for (topheader = rbtnode->data;
5692                              topheader != NULL;
5693                              topheader = topheader->next) {
5694                                 if (topheader->type ==
5695                                     RBTDB_RDATATYPE_NCACHEANY)
5696                                         break;
5697                         }
5698                         if (topheader != NULL && EXISTS(topheader) &&
5699                             topheader->rdh_ttl > now) {
5700                                 /*
5701                                  * Found one.
5702                                  */
5703                                 if (trust < topheader->trust) {
5704                                         /*
5705                                          * The NXDOMAIN/NODATA(QTYPE=ANY)
5706                                          * is more trusted.
5707                                          */
5708                                         free_rdataset(rbtdb,
5709                                                       rbtdb->common.mctx,
5710                                                       newheader);
5711                                         if (addedrdataset != NULL)
5712                                                 bind_rdataset(rbtdb, rbtnode,
5713                                                               topheader, now,
5714                                                               addedrdataset);
5715                                         return (DNS_R_UNCHANGED);
5716                                 }
5717                                 /*
5718                                  * The new rdataset is better.  Expire the
5719                                  * NXDOMAIN/NODATA(QTYPE=ANY).
5720                                  */
5721                                 set_ttl(rbtdb, topheader, 0);
5722                                 topheader->attributes |= RDATASET_ATTR_STALE;
5723                                 rbtnode->dirty = 1;
5724                                 topheader = NULL;
5725                                 goto find_header;
5726                         }
5727                         negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
5728                 }
5729         }
5730
5731         for (topheader = rbtnode->data;
5732              topheader != NULL;
5733              topheader = topheader->next) {
5734                 if (topheader->type == newheader->type ||
5735                     topheader->type == negtype)
5736                         break;
5737                 topheader_prev = topheader;
5738         }
5739
5740  find_header:
5741         /*
5742          * If header isn't NULL, we've found the right type.  There may be
5743          * IGNORE rdatasets between the top of the chain and the first real
5744          * data.  We skip over them.
5745          */
5746         header = topheader;
5747         while (header != NULL && IGNORE(header))
5748                 header = header->down;
5749         if (header != NULL) {
5750                 header_nx = NONEXISTENT(header) ? ISC_TRUE : ISC_FALSE;
5751
5752                 /*
5753                  * Deleting an already non-existent rdataset has no effect.
5754                  */
5755                 if (header_nx && newheader_nx) {
5756                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5757                         return (DNS_R_UNCHANGED);
5758                 }
5759
5760                 /*
5761                  * Trying to add an rdataset with lower trust to a cache DB
5762                  * has no effect, provided that the cache data isn't stale.
5763                  */
5764                 if (rbtversion == NULL && trust < header->trust &&
5765                     (header->rdh_ttl > now || header_nx)) {
5766                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5767                         if (addedrdataset != NULL)
5768                                 bind_rdataset(rbtdb, rbtnode, header, now,
5769                                               addedrdataset);
5770                         return (DNS_R_UNCHANGED);
5771                 }
5772
5773                 /*
5774                  * Don't merge if a nonexistent rdataset is involved.
5775                  */
5776                 if (merge && (header_nx || newheader_nx))
5777                         merge = ISC_FALSE;
5778
5779                 /*
5780                  * If 'merge' is ISC_TRUE, we'll try to create a new rdataset
5781                  * that is the union of 'newheader' and 'header'.
5782                  */
5783                 if (merge) {
5784                         unsigned int flags = 0;
5785                         INSIST(rbtversion->serial >= header->serial);
5786                         merged = NULL;
5787                         result = ISC_R_SUCCESS;
5788
5789                         if ((options & DNS_DBADD_EXACT) != 0)
5790                                 flags |= DNS_RDATASLAB_EXACT;
5791                         if ((options & DNS_DBADD_EXACTTTL) != 0 &&
5792                              newheader->rdh_ttl != header->rdh_ttl)
5793                                         result = DNS_R_NOTEXACT;
5794                         else if (newheader->rdh_ttl != header->rdh_ttl)
5795                                 flags |= DNS_RDATASLAB_FORCE;
5796                         if (result == ISC_R_SUCCESS)
5797                                 result = dns_rdataslab_merge(
5798                                              (unsigned char *)header,
5799                                              (unsigned char *)newheader,
5800                                              (unsigned int)(sizeof(*newheader)),
5801                                              rbtdb->common.mctx,
5802                                              rbtdb->common.rdclass,
5803                                              (dns_rdatatype_t)header->type,
5804                                              flags, &merged);
5805                         if (result == ISC_R_SUCCESS) {
5806                                 /*
5807                                  * If 'header' has the same serial number as
5808                                  * we do, we could clean it up now if we knew
5809                                  * that our caller had no references to it.
5810                                  * We don't know this, however, so we leave it
5811                                  * alone.  It will get cleaned up when
5812                                  * clean_zone_node() runs.
5813                                  */
5814                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5815                                               newheader);
5816                                 newheader = (rdatasetheader_t *)merged;
5817                                 init_rdataset(rbtdb, newheader);
5818                                 if (loading && RESIGN(newheader) &&
5819                                     RESIGN(header) &&
5820                                     header->resign < newheader->resign)
5821                                         newheader->resign = header->resign;
5822                         } else {
5823                                 free_rdataset(rbtdb, rbtdb->common.mctx,
5824                                               newheader);
5825                                 return (result);
5826                         }
5827                 }
5828                 /*
5829                  * Don't replace existing NS, A and AAAA RRsets
5830                  * in the cache if they are already exist.  This
5831                  * prevents named being locked to old servers.
5832                  * Don't lower trust of existing record if the
5833                  * update is forced.
5834                  */
5835                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5836                     header->type == dns_rdatatype_ns &&
5837                     !header_nx && !newheader_nx &&
5838                     header->trust >= newheader->trust &&
5839                     dns_rdataslab_equalx((unsigned char *)header,
5840                                          (unsigned char *)newheader,
5841                                          (unsigned int)(sizeof(*newheader)),
5842                                          rbtdb->common.rdclass,
5843                                          (dns_rdatatype_t)header->type)) {
5844                         /*
5845                          * Honour the new ttl if it is less than the
5846                          * older one.
5847                          */
5848                         if (header->rdh_ttl > newheader->rdh_ttl)
5849                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5850                         if (header->noqname == NULL &&
5851                             newheader->noqname != NULL) {
5852                                 header->noqname = newheader->noqname;
5853                                 newheader->noqname = NULL;
5854                         }
5855                         if (header->closest == NULL &&
5856                             newheader->closest != NULL) {
5857                                 header->closest = newheader->closest;
5858                                 newheader->closest = NULL;
5859                         }
5860                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5861                         if (addedrdataset != NULL)
5862                                 bind_rdataset(rbtdb, rbtnode, header, now,
5863                                               addedrdataset);
5864                         return (ISC_R_SUCCESS);
5865                 }
5866                 if (IS_CACHE(rbtdb) && header->rdh_ttl > now &&
5867                     (header->type == dns_rdatatype_a ||
5868                      header->type == dns_rdatatype_aaaa) &&
5869                     !header_nx && !newheader_nx &&
5870                     header->trust >= newheader->trust &&
5871                     dns_rdataslab_equal((unsigned char *)header,
5872                                         (unsigned char *)newheader,
5873                                         (unsigned int)(sizeof(*newheader)))) {
5874                         /*
5875                          * Honour the new ttl if it is less than the
5876                          * older one.
5877                          */
5878                         if (header->rdh_ttl > newheader->rdh_ttl)
5879                                 set_ttl(rbtdb, header, newheader->rdh_ttl);
5880                         if (header->noqname == NULL &&
5881                             newheader->noqname != NULL) {
5882                                 header->noqname = newheader->noqname;
5883                                 newheader->noqname = NULL;
5884                         }
5885                         if (header->closest == NULL &&
5886                             newheader->closest != NULL) {
5887                                 header->closest = newheader->closest;
5888                                 newheader->closest = NULL;
5889                         }
5890                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5891                         if (addedrdataset != NULL)
5892                                 bind_rdataset(rbtdb, rbtnode, header, now,
5893                                               addedrdataset);
5894                         return (ISC_R_SUCCESS);
5895                 }
5896                 INSIST(rbtversion == NULL ||
5897                        rbtversion->serial >= topheader->serial);
5898                 if (topheader_prev != NULL)
5899                         topheader_prev->next = newheader;
5900                 else
5901                         rbtnode->data = newheader;
5902                 newheader->next = topheader->next;
5903                 if (loading) {
5904                         /*
5905                          * There are no other references to 'header' when
5906                          * loading, so we MAY clean up 'header' now.
5907                          * Since we don't generate changed records when
5908                          * loading, we MUST clean up 'header' now.
5909                          */
5910                         newheader->down = NULL;
5911                         free_rdataset(rbtdb, rbtdb->common.mctx, header);
5912                 } else {
5913                         newheader->down = topheader;
5914                         topheader->next = newheader;
5915                         rbtnode->dirty = 1;
5916                         if (changed != NULL)
5917                                 changed->dirty = ISC_TRUE;
5918                         if (rbtversion == NULL) {
5919                                 set_ttl(rbtdb, header, 0);
5920                                 header->attributes |= RDATASET_ATTR_STALE;
5921                         }
5922                         idx = newheader->node->locknum;
5923                         if (IS_CACHE(rbtdb)) {
5924                                 ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5925                                                  newheader, link);
5926                                 /*
5927                                  * XXXMLG We don't check the return value
5928                                  * here.  If it fails, we will not do TTL
5929                                  * based expiry on this node.  However, we
5930                                  * will do it on the LRU side, so memory
5931                                  * will not leak... for long.
5932                                  */
5933                                 isc_heap_insert(rbtdb->heaps[idx], newheader);
5934                         } else if (RESIGN(newheader))
5935                                 resign_insert(rbtdb, idx, newheader);
5936                 }
5937         } else {
5938                 /*
5939                  * No non-IGNORED rdatasets of the given type exist at
5940                  * this node.
5941                  */
5942
5943                 /*
5944                  * If we're trying to delete the type, don't bother.
5945                  */
5946                 if (newheader_nx) {
5947                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
5948                         return (DNS_R_UNCHANGED);
5949                 }
5950
5951                 if (topheader != NULL) {
5952                         /*
5953                          * We have an list of rdatasets of the given type,
5954                          * but they're all marked IGNORE.  We simply insert
5955                          * the new rdataset at the head of the list.
5956                          *
5957                          * Ignored rdatasets cannot occur during loading, so
5958                          * we INSIST on it.
5959                          */
5960                         INSIST(!loading);
5961                         INSIST(rbtversion == NULL ||
5962                                rbtversion->serial >= topheader->serial);
5963                         if (topheader_prev != NULL)
5964                                 topheader_prev->next = newheader;
5965                         else
5966                                 rbtnode->data = newheader;
5967                         newheader->next = topheader->next;
5968                         newheader->down = topheader;
5969                         topheader->next = newheader;
5970                         rbtnode->dirty = 1;
5971                         if (changed != NULL)
5972                                 changed->dirty = ISC_TRUE;
5973                 } else {
5974                         /*
5975                          * No rdatasets of the given type exist at the node.
5976                          */
5977                         newheader->next = rbtnode->data;
5978                         newheader->down = NULL;
5979                         rbtnode->data = newheader;
5980                 }
5981                 idx = newheader->node->locknum;
5982                 if (IS_CACHE(rbtdb)) {
5983                         ISC_LIST_PREPEND(rbtdb->rdatasets[idx],
5984                                          newheader, link);
5985                         isc_heap_insert(rbtdb->heaps[idx], newheader);
5986                 } else if (RESIGN(newheader)) {
5987                         resign_insert(rbtdb, idx, newheader);
5988                 }
5989         }
5990
5991         /*
5992          * Check if the node now contains CNAME and other data.
5993          */
5994         if (rbtversion != NULL &&
5995             cname_and_other_data(rbtnode, rbtversion->serial))
5996                 return (DNS_R_CNAMEANDOTHER);
5997
5998         if (addedrdataset != NULL)
5999                 bind_rdataset(rbtdb, rbtnode, newheader, now, addedrdataset);
6000
6001         return (ISC_R_SUCCESS);
6002 }
6003
6004 static inline isc_boolean_t
6005 delegating_type(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
6006                 rbtdb_rdatatype_t type)
6007 {
6008         if (IS_CACHE(rbtdb)) {
6009                 if (type == dns_rdatatype_dname)
6010                         return (ISC_TRUE);
6011                 else
6012                         return (ISC_FALSE);
6013         } else if (type == dns_rdatatype_dname ||
6014                    (type == dns_rdatatype_ns &&
6015                     (node != rbtdb->origin_node || IS_STUB(rbtdb))))
6016                 return (ISC_TRUE);
6017         return (ISC_FALSE);
6018 }
6019
6020 static inline isc_result_t
6021 addnoqname(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6022            dns_rdataset_t *rdataset)
6023 {
6024         struct noqname *noqname;
6025         isc_mem_t *mctx = rbtdb->common.mctx;
6026         dns_name_t name;
6027         dns_rdataset_t neg, negsig;
6028         isc_result_t result;
6029         isc_region_t r;
6030
6031         dns_name_init(&name, NULL);
6032         dns_rdataset_init(&neg);
6033         dns_rdataset_init(&negsig);
6034
6035         result = dns_rdataset_getnoqname(rdataset, &name, &neg, &negsig);
6036         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6037
6038         noqname = isc_mem_get(mctx, sizeof(*noqname));
6039         if (noqname == NULL) {
6040                 result = ISC_R_NOMEMORY;
6041                 goto cleanup;
6042         }
6043         dns_name_init(&noqname->name, NULL);
6044         noqname->neg = NULL;
6045         noqname->negsig = NULL;
6046         noqname->type = neg.type;
6047         result = dns_name_dup(&name, mctx, &noqname->name);
6048         if (result != ISC_R_SUCCESS)
6049                 goto cleanup;
6050         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6051         if (result != ISC_R_SUCCESS)
6052                 goto cleanup;
6053         noqname->neg = r.base;
6054         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6055         if (result != ISC_R_SUCCESS)
6056                 goto cleanup;
6057         noqname->negsig = r.base;
6058         dns_rdataset_disassociate(&neg);
6059         dns_rdataset_disassociate(&negsig);
6060         newheader->noqname = noqname;
6061         return (ISC_R_SUCCESS);
6062
6063 cleanup:
6064         dns_rdataset_disassociate(&neg);
6065         dns_rdataset_disassociate(&negsig);
6066         free_noqname(mctx, &noqname);
6067         return(result);
6068 }
6069
6070 static inline isc_result_t
6071 addclosest(dns_rbtdb_t *rbtdb, rdatasetheader_t *newheader,
6072            dns_rdataset_t *rdataset)
6073 {
6074         struct noqname *closest;
6075         isc_mem_t *mctx = rbtdb->common.mctx;
6076         dns_name_t name;
6077         dns_rdataset_t neg, negsig;
6078         isc_result_t result;
6079         isc_region_t r;
6080
6081         dns_name_init(&name, NULL);
6082         dns_rdataset_init(&neg);
6083         dns_rdataset_init(&negsig);
6084
6085         result = dns_rdataset_getclosest(rdataset, &name, &neg, &negsig);
6086         RUNTIME_CHECK(result == ISC_R_SUCCESS);
6087
6088         closest = isc_mem_get(mctx, sizeof(*closest));
6089         if (closest == NULL) {
6090                 result = ISC_R_NOMEMORY;
6091                 goto cleanup;
6092         }
6093         dns_name_init(&closest->name, NULL);
6094         closest->neg = NULL;
6095         closest->negsig = NULL;
6096         closest->type = neg.type;
6097         result = dns_name_dup(&name, mctx, &closest->name);
6098         if (result != ISC_R_SUCCESS)
6099                 goto cleanup;
6100         result = dns_rdataslab_fromrdataset(&neg, mctx, &r, 0);
6101         if (result != ISC_R_SUCCESS)
6102                 goto cleanup;
6103         closest->neg = r.base;
6104         result = dns_rdataslab_fromrdataset(&negsig, mctx, &r, 0);
6105         if (result != ISC_R_SUCCESS)
6106                 goto cleanup;
6107         closest->negsig = r.base;
6108         dns_rdataset_disassociate(&neg);
6109         dns_rdataset_disassociate(&negsig);
6110         newheader->closest = closest;
6111         return (ISC_R_SUCCESS);
6112
6113  cleanup:
6114         dns_rdataset_disassociate(&neg);
6115         dns_rdataset_disassociate(&negsig);
6116         free_noqname(mctx, &closest);
6117         return(result);
6118 }
6119
6120 static dns_dbmethods_t zone_methods;
6121
6122 static isc_result_t
6123 addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6124             isc_stdtime_t now, dns_rdataset_t *rdataset, unsigned int options,
6125             dns_rdataset_t *addedrdataset)
6126 {
6127         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6128         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6129         rbtdb_version_t *rbtversion = version;
6130         isc_region_t region;
6131         rdatasetheader_t *newheader;
6132         rdatasetheader_t *header;
6133         isc_result_t result;
6134         isc_boolean_t delegating;
6135         isc_boolean_t newnsec;
6136         isc_boolean_t tree_locked = ISC_FALSE;
6137
6138         REQUIRE(VALID_RBTDB(rbtdb));
6139
6140         if (rbtdb->common.methods == &zone_methods)
6141                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6142                           (rdataset->type == dns_rdatatype_nsec3 ||
6143                            rdataset->covers == dns_rdatatype_nsec3)) ||
6144                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6145                            rdataset->type != dns_rdatatype_nsec3 &&
6146                            rdataset->covers != dns_rdatatype_nsec3)));
6147
6148         if (rbtversion == NULL) {
6149                 if (now == 0)
6150                         isc_stdtime_get(&now);
6151         } else
6152                 now = 0;
6153
6154         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6155                                             &region,
6156                                             sizeof(rdatasetheader_t));
6157         if (result != ISC_R_SUCCESS)
6158                 return (result);
6159
6160         newheader = (rdatasetheader_t *)region.base;
6161         init_rdataset(rbtdb, newheader);
6162         set_ttl(rbtdb, newheader, rdataset->ttl + now);
6163         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6164                                                 rdataset->covers);
6165         newheader->attributes = 0;
6166         newheader->noqname = NULL;
6167         newheader->closest = NULL;
6168         newheader->count = init_count++;
6169         newheader->trust = rdataset->trust;
6170         newheader->additional_auth = NULL;
6171         newheader->additional_glue = NULL;
6172         newheader->last_used = now;
6173         newheader->node = rbtnode;
6174         if (rbtversion != NULL) {
6175                 newheader->serial = rbtversion->serial;
6176                 now = 0;
6177
6178                 if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6179                         newheader->attributes |= RDATASET_ATTR_RESIGN;
6180                         newheader->resign = rdataset->resign;
6181                 } else
6182                         newheader->resign = 0;
6183         } else {
6184                 newheader->serial = 1;
6185                 newheader->resign = 0;
6186                 if ((rdataset->attributes & DNS_RDATASETATTR_NXDOMAIN) != 0)
6187                         newheader->attributes |= RDATASET_ATTR_NXDOMAIN;
6188                 if ((rdataset->attributes & DNS_RDATASETATTR_OPTOUT) != 0)
6189                         newheader->attributes |= RDATASET_ATTR_OPTOUT;
6190                 if ((rdataset->attributes & DNS_RDATASETATTR_NOQNAME) != 0) {
6191                         result = addnoqname(rbtdb, newheader, rdataset);
6192                         if (result != ISC_R_SUCCESS) {
6193                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6194                                               newheader);
6195                                 return (result);
6196                         }
6197                 }
6198                 if ((rdataset->attributes & DNS_RDATASETATTR_CLOSEST) != 0) {
6199                         result = addclosest(rbtdb, newheader, rdataset);
6200                         if (result != ISC_R_SUCCESS) {
6201                                 free_rdataset(rbtdb, rbtdb->common.mctx,
6202                                               newheader);
6203                                 return (result);
6204                         }
6205                 }
6206         }
6207
6208         /*
6209          * If we're adding a delegation type (e.g. NS or DNAME for a zone,
6210          * just DNAME for the cache), then we need to set the callback bit
6211          * on the node.
6212          */
6213         if (delegating_type(rbtdb, rbtnode, rdataset->type))
6214                 delegating = ISC_TRUE;
6215         else
6216                 delegating = ISC_FALSE;
6217
6218         /*
6219          * Add to the auxiliary NSEC tree if we're adding an NSEC record.
6220          */
6221         if (rbtnode->nsec != DNS_RBT_NSEC_HAS_NSEC &&
6222             rdataset->type == dns_rdatatype_nsec)
6223                 newnsec = ISC_TRUE;
6224         else
6225                 newnsec = ISC_FALSE;
6226
6227         /*
6228          * If we're adding a delegation type, adding to the auxiliary NSEC tree,
6229          * or the DB is a cache in an overmem state, hold an exclusive lock on
6230          * the tree.  In the latter case the lock does not necessarily have to
6231          * be acquired but it will help purge stale entries more effectively.
6232          */
6233         if (delegating || newnsec || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
6234                 tree_locked = ISC_TRUE;
6235                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6236         }
6237
6238         if (IS_CACHE(rbtdb) && rbtdb->overmem)
6239                 overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
6240
6241         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6242                   isc_rwlocktype_write);
6243
6244         if (rbtdb->rrsetstats != NULL) {
6245                 newheader->attributes |= RDATASET_ATTR_STATCOUNT;
6246                 update_rrsetstats(rbtdb, newheader, ISC_TRUE);
6247         }
6248
6249         if (IS_CACHE(rbtdb)) {
6250                 if (tree_locked)
6251                         cleanup_dead_nodes(rbtdb, rbtnode->locknum);
6252
6253                 header = isc_heap_element(rbtdb->heaps[rbtnode->locknum], 1);
6254                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL)
6255                         expire_header(rbtdb, header, tree_locked);
6256
6257                 /*
6258                  * If we've been holding a write lock on the tree just for
6259                  * cleaning, we can release it now.  However, we still need the
6260                  * node lock.
6261                  */
6262                 if (tree_locked && !delegating && !newnsec) {
6263                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6264                         tree_locked = ISC_FALSE;
6265                 }
6266         }
6267
6268         result = ISC_R_SUCCESS;
6269         if (newnsec) {
6270                 dns_fixedname_t fname;
6271                 dns_name_t *name;
6272                 dns_rbtnode_t *nsecnode;
6273
6274                 dns_fixedname_init(&fname);
6275                 name = dns_fixedname_name(&fname);
6276                 dns_rbt_fullnamefromnode(rbtnode, name);
6277                 nsecnode = NULL;
6278                 result = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6279                 if (result == ISC_R_SUCCESS) {
6280                         nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6281                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6282                 } else if (result == ISC_R_EXISTS) {
6283                         rbtnode->nsec = DNS_RBT_NSEC_HAS_NSEC;
6284                         result = ISC_R_SUCCESS;
6285                 }
6286         }
6287
6288         if (result == ISC_R_SUCCESS)
6289                 result = add(rbtdb, rbtnode, rbtversion, newheader, options,
6290                              ISC_FALSE, addedrdataset, now);
6291         if (result == ISC_R_SUCCESS && delegating)
6292                 rbtnode->find_callback = 1;
6293
6294         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6295                     isc_rwlocktype_write);
6296
6297         if (tree_locked)
6298                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
6299
6300         /*
6301          * Update the zone's secure status.  If version is non-NULL
6302          * this is deferred until closeversion() is called.
6303          */
6304         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6305                 iszonesecure(db, version, rbtdb->origin_node);
6306
6307         return (result);
6308 }
6309
6310 static isc_result_t
6311 subtractrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6312                  dns_rdataset_t *rdataset, unsigned int options,
6313                  dns_rdataset_t *newrdataset)
6314 {
6315         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6316         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6317         rbtdb_version_t *rbtversion = version;
6318         rdatasetheader_t *topheader, *topheader_prev, *header, *newheader;
6319         unsigned char *subresult;
6320         isc_region_t region;
6321         isc_result_t result;
6322         rbtdb_changed_t *changed;
6323
6324         REQUIRE(VALID_RBTDB(rbtdb));
6325
6326         if (rbtdb->common.methods == &zone_methods)
6327                 REQUIRE(((rbtnode->nsec == DNS_RBT_NSEC_NSEC3 &&
6328                           (rdataset->type == dns_rdatatype_nsec3 ||
6329                            rdataset->covers == dns_rdatatype_nsec3)) ||
6330                          (rbtnode->nsec != DNS_RBT_NSEC_NSEC3 &&
6331                            rdataset->type != dns_rdatatype_nsec3 &&
6332                            rdataset->covers != dns_rdatatype_nsec3)));
6333
6334         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6335                                             &region,
6336                                             sizeof(rdatasetheader_t));
6337         if (result != ISC_R_SUCCESS)
6338                 return (result);
6339         newheader = (rdatasetheader_t *)region.base;
6340         init_rdataset(rbtdb, newheader);
6341         set_ttl(rbtdb, newheader, rdataset->ttl);
6342         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6343                                                 rdataset->covers);
6344         newheader->attributes = 0;
6345         newheader->serial = rbtversion->serial;
6346         newheader->trust = 0;
6347         newheader->noqname = NULL;
6348         newheader->closest = NULL;
6349         newheader->count = init_count++;
6350         newheader->additional_auth = NULL;
6351         newheader->additional_glue = NULL;
6352         newheader->last_used = 0;
6353         newheader->node = rbtnode;
6354         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6355                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6356                 newheader->resign = rdataset->resign;
6357         } else
6358                 newheader->resign = 0;
6359
6360         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6361                   isc_rwlocktype_write);
6362
6363         changed = add_changed(rbtdb, rbtversion, rbtnode);
6364         if (changed == NULL) {
6365                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6366                 NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6367                             isc_rwlocktype_write);
6368                 return (ISC_R_NOMEMORY);
6369         }
6370
6371         topheader_prev = NULL;
6372         for (topheader = rbtnode->data;
6373              topheader != NULL;
6374              topheader = topheader->next) {
6375                 if (topheader->type == newheader->type)
6376                         break;
6377                 topheader_prev = topheader;
6378         }
6379         /*
6380          * If header isn't NULL, we've found the right type.  There may be
6381          * IGNORE rdatasets between the top of the chain and the first real
6382          * data.  We skip over them.
6383          */
6384         header = topheader;
6385         while (header != NULL && IGNORE(header))
6386                 header = header->down;
6387         if (header != NULL && EXISTS(header)) {
6388                 unsigned int flags = 0;
6389                 subresult = NULL;
6390                 result = ISC_R_SUCCESS;
6391                 if ((options & DNS_DBSUB_EXACT) != 0) {
6392                         flags |= DNS_RDATASLAB_EXACT;
6393                         if (newheader->rdh_ttl != header->rdh_ttl)
6394                                 result = DNS_R_NOTEXACT;
6395                 }
6396                 if (result == ISC_R_SUCCESS)
6397                         result = dns_rdataslab_subtract(
6398                                         (unsigned char *)header,
6399                                         (unsigned char *)newheader,
6400                                         (unsigned int)(sizeof(*newheader)),
6401                                         rbtdb->common.mctx,
6402                                         rbtdb->common.rdclass,
6403                                         (dns_rdatatype_t)header->type,
6404                                         flags, &subresult);
6405                 if (result == ISC_R_SUCCESS) {
6406                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6407                         newheader = (rdatasetheader_t *)subresult;
6408                         init_rdataset(rbtdb, newheader);
6409                         /*
6410                          * We have to set the serial since the rdataslab
6411                          * subtraction routine copies the reserved portion of
6412                          * header, not newheader.
6413                          */
6414                         newheader->serial = rbtversion->serial;
6415                         /*
6416                          * XXXJT: dns_rdataslab_subtract() copied the pointers
6417                          * to additional info.  We need to clear these fields
6418                          * to avoid having duplicated references.
6419                          */
6420                         newheader->additional_auth = NULL;
6421                         newheader->additional_glue = NULL;
6422                 } else if (result == DNS_R_NXRRSET) {
6423                         /*
6424                          * This subtraction would remove all of the rdata;
6425                          * add a nonexistent header instead.
6426                          */
6427                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6428                         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6429                         if (newheader == NULL) {
6430                                 result = ISC_R_NOMEMORY;
6431                                 goto unlock;
6432                         }
6433                         set_ttl(rbtdb, newheader, 0);
6434                         newheader->type = topheader->type;
6435                         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6436                         newheader->trust = 0;
6437                         newheader->serial = rbtversion->serial;
6438                         newheader->noqname = NULL;
6439                         newheader->closest = NULL;
6440                         newheader->count = 0;
6441                         newheader->additional_auth = NULL;
6442                         newheader->additional_glue = NULL;
6443                         newheader->node = rbtnode;
6444                         newheader->resign = 0;
6445                         newheader->last_used = 0;
6446                 } else {
6447                         free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6448                         goto unlock;
6449                 }
6450
6451                 /*
6452                  * If we're here, we want to link newheader in front of
6453                  * topheader.
6454                  */
6455                 INSIST(rbtversion->serial >= topheader->serial);
6456                 if (topheader_prev != NULL)
6457                         topheader_prev->next = newheader;
6458                 else
6459                         rbtnode->data = newheader;
6460                 newheader->next = topheader->next;
6461                 newheader->down = topheader;
6462                 topheader->next = newheader;
6463                 rbtnode->dirty = 1;
6464                 changed->dirty = ISC_TRUE;
6465         } else {
6466                 /*
6467                  * The rdataset doesn't exist, so we don't need to do anything
6468                  * to satisfy the deletion request.
6469                  */
6470                 free_rdataset(rbtdb, rbtdb->common.mctx, newheader);
6471                 if ((options & DNS_DBSUB_EXACT) != 0)
6472                         result = DNS_R_NOTEXACT;
6473                 else
6474                         result = DNS_R_UNCHANGED;
6475         }
6476
6477         if (result == ISC_R_SUCCESS && newrdataset != NULL)
6478                 bind_rdataset(rbtdb, rbtnode, newheader, 0, newrdataset);
6479
6480  unlock:
6481         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6482                     isc_rwlocktype_write);
6483
6484         /*
6485          * Update the zone's secure status.  If version is non-NULL
6486          * this is deferred until closeversion() is called.
6487          */
6488         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6489                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6490
6491         return (result);
6492 }
6493
6494 static isc_result_t
6495 deleterdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
6496                dns_rdatatype_t type, dns_rdatatype_t covers)
6497 {
6498         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6499         dns_rbtnode_t *rbtnode = (dns_rbtnode_t *)node;
6500         rbtdb_version_t *rbtversion = version;
6501         isc_result_t result;
6502         rdatasetheader_t *newheader;
6503
6504         REQUIRE(VALID_RBTDB(rbtdb));
6505
6506         if (type == dns_rdatatype_any)
6507                 return (ISC_R_NOTIMPLEMENTED);
6508         if (type == dns_rdatatype_rrsig && covers == 0)
6509                 return (ISC_R_NOTIMPLEMENTED);
6510
6511         newheader = new_rdataset(rbtdb, rbtdb->common.mctx);
6512         if (newheader == NULL)
6513                 return (ISC_R_NOMEMORY);
6514         set_ttl(rbtdb, newheader, 0);
6515         newheader->type = RBTDB_RDATATYPE_VALUE(type, covers);
6516         newheader->attributes = RDATASET_ATTR_NONEXISTENT;
6517         newheader->trust = 0;
6518         newheader->noqname = NULL;
6519         newheader->closest = NULL;
6520         newheader->additional_auth = NULL;
6521         newheader->additional_glue = NULL;
6522         if (rbtversion != NULL)
6523                 newheader->serial = rbtversion->serial;
6524         else
6525                 newheader->serial = 0;
6526         newheader->count = 0;
6527         newheader->last_used = 0;
6528         newheader->node = rbtnode;
6529
6530         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6531                   isc_rwlocktype_write);
6532
6533         result = add(rbtdb, rbtnode, rbtversion, newheader, DNS_DBADD_FORCE,
6534                      ISC_FALSE, NULL, 0);
6535
6536         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
6537                     isc_rwlocktype_write);
6538
6539         /*
6540          * Update the zone's secure status.  If version is non-NULL
6541          * this is deferred until closeversion() is called.
6542          */
6543         if (result == ISC_R_SUCCESS && version == NULL && !IS_CACHE(rbtdb))
6544                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6545
6546         return (result);
6547 }
6548
6549 /*
6550  * load a non-NSEC3 node in the main tree and optionally to the auxiliary NSEC
6551  */
6552 static isc_result_t
6553 loadnode(dns_rbtdb_t *rbtdb, dns_name_t *name, dns_rbtnode_t **nodep,
6554          isc_boolean_t hasnsec)
6555 {
6556         isc_result_t noderesult, nsecresult;
6557         dns_rbtnode_t *nsecnode;
6558
6559         noderesult = dns_rbt_addnode(rbtdb->tree, name, nodep);
6560         if (!hasnsec)
6561                 return (noderesult);
6562         if (noderesult == ISC_R_EXISTS) {
6563                 /*
6564                  * Add a node to the auxiliary NSEC tree for an old node
6565                  * just now getting an NSEC record.
6566                  */
6567                 if ((*nodep)->nsec == DNS_RBT_NSEC_HAS_NSEC)
6568                         return (noderesult);
6569         } else if (noderesult != ISC_R_SUCCESS) {
6570                 return (noderesult);
6571         }
6572
6573         /*
6574          * Build the auxiliary tree for NSECs as we go.
6575          * This tree speeds searches for closest NSECs that would otherwise
6576          * need to examine many irrelevant nodes in large TLDs.
6577          *
6578          * Add nodes to the auxiliary tree after corresponding nodes have
6579          * been added to the main tree.
6580          */
6581         nsecnode = NULL;
6582         nsecresult = dns_rbt_addnode(rbtdb->nsec, name, &nsecnode);
6583         if (nsecresult == ISC_R_SUCCESS) {
6584                 nsecnode->nsec = DNS_RBT_NSEC_NSEC;
6585                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6586                 return (noderesult);
6587         }
6588
6589         if (nsecresult == ISC_R_EXISTS) {
6590 #if 1 /* 0 */
6591                 isc_log_write(dns_lctx,
6592                               DNS_LOGCATEGORY_DATABASE,
6593                               DNS_LOGMODULE_CACHE,
6594                               ISC_LOG_WARNING,
6595                               "addnode: NSEC node already exists");
6596 #endif
6597                 (*nodep)->nsec = DNS_RBT_NSEC_HAS_NSEC;
6598                 return (noderesult);
6599         }
6600
6601         nsecresult = dns_rbt_deletenode(rbtdb->tree, *nodep, ISC_FALSE);
6602         if (nsecresult != ISC_R_SUCCESS)
6603                 isc_log_write(dns_lctx,
6604                               DNS_LOGCATEGORY_DATABASE,
6605                               DNS_LOGMODULE_CACHE,
6606                               ISC_LOG_WARNING,
6607                               "loading_addrdataset: "
6608                               "dns_rbt_deletenode: %s after "
6609                               "dns_rbt_addnode(NSEC): %s",
6610                               isc_result_totext(nsecresult),
6611                               isc_result_totext(noderesult));
6612         return (noderesult);
6613 }
6614
6615 static isc_result_t
6616 loading_addrdataset(void *arg, dns_name_t *name, dns_rdataset_t *rdataset) {
6617         rbtdb_load_t *loadctx = arg;
6618         dns_rbtdb_t *rbtdb = loadctx->rbtdb;
6619         dns_rbtnode_t *node;
6620         isc_result_t result;
6621         isc_region_t region;
6622         rdatasetheader_t *newheader;
6623
6624         /*
6625          * This routine does no node locking.  See comments in
6626          * 'load' below for more information on loading and
6627          * locking.
6628          */
6629
6630
6631         /*
6632          * SOA records are only allowed at top of zone.
6633          */
6634         if (rdataset->type == dns_rdatatype_soa &&
6635             !IS_CACHE(rbtdb) && !dns_name_equal(name, &rbtdb->common.origin))
6636                 return (DNS_R_NOTZONETOP);
6637
6638         if (rdataset->type != dns_rdatatype_nsec3 &&
6639             rdataset->covers != dns_rdatatype_nsec3)
6640                 add_empty_wildcards(rbtdb, name);
6641
6642         if (dns_name_iswildcard(name)) {
6643                 /*
6644                  * NS record owners cannot legally be wild cards.
6645                  */
6646                 if (rdataset->type == dns_rdatatype_ns)
6647                         return (DNS_R_INVALIDNS);
6648                 /*
6649                  * NSEC3 record owners cannot legally be wild cards.
6650                  */
6651                 if (rdataset->type == dns_rdatatype_nsec3)
6652                         return (DNS_R_INVALIDNSEC3);
6653                 result = add_wildcard_magic(rbtdb, name);
6654                 if (result != ISC_R_SUCCESS)
6655                         return (result);
6656         }
6657
6658         node = NULL;
6659         if (rdataset->type == dns_rdatatype_nsec3 ||
6660             rdataset->covers == dns_rdatatype_nsec3) {
6661                 result = dns_rbt_addnode(rbtdb->nsec3, name, &node);
6662                 if (result == ISC_R_SUCCESS)
6663                         node->nsec = DNS_RBT_NSEC_NSEC3;
6664         } else if (rdataset->type == dns_rdatatype_nsec) {
6665                 result = loadnode(rbtdb, name, &node, ISC_TRUE);
6666         } else {
6667                 result = loadnode(rbtdb, name, &node, ISC_FALSE);
6668         }
6669         if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS)
6670                 return (result);
6671         if (result != ISC_R_EXISTS) {
6672                 dns_name_t foundname;
6673                 dns_name_init(&foundname, NULL);
6674                 dns_rbt_namefromnode(node, &foundname);
6675 #ifdef DNS_RBT_USEHASH
6676                 node->locknum = node->hashval % rbtdb->node_lock_count;
6677 #else
6678                 node->locknum = dns_name_hash(&foundname, ISC_TRUE) %
6679                         rbtdb->node_lock_count;
6680 #endif
6681         }
6682
6683         result = dns_rdataslab_fromrdataset(rdataset, rbtdb->common.mctx,
6684                                             &region,
6685                                             sizeof(rdatasetheader_t));
6686         if (result != ISC_R_SUCCESS)
6687                 return (result);
6688         newheader = (rdatasetheader_t *)region.base;
6689         init_rdataset(rbtdb, newheader);
6690         set_ttl(rbtdb, newheader,
6691                 rdataset->ttl + loadctx->now); /* XXX overflow check */
6692         newheader->type = RBTDB_RDATATYPE_VALUE(rdataset->type,
6693                                                 rdataset->covers);
6694         newheader->attributes = 0;
6695         newheader->trust = rdataset->trust;
6696         newheader->serial = 1;
6697         newheader->noqname = NULL;
6698         newheader->closest = NULL;
6699         newheader->count = init_count++;
6700         newheader->additional_auth = NULL;
6701         newheader->additional_glue = NULL;
6702         newheader->last_used = 0;
6703         newheader->node = node;
6704         if ((rdataset->attributes & DNS_RDATASETATTR_RESIGN) != 0) {
6705                 newheader->attributes |= RDATASET_ATTR_RESIGN;
6706                 newheader->resign = rdataset->resign;
6707         } else
6708                 newheader->resign = 0;
6709
6710         result = add(rbtdb, node, rbtdb->current_version, newheader,
6711                      DNS_DBADD_MERGE, ISC_TRUE, NULL, 0);
6712         if (result == ISC_R_SUCCESS &&
6713             delegating_type(rbtdb, node, rdataset->type))
6714                 node->find_callback = 1;
6715         else if (result == DNS_R_UNCHANGED)
6716                 result = ISC_R_SUCCESS;
6717
6718         return (result);
6719 }
6720
6721 static isc_result_t
6722 beginload(dns_db_t *db, dns_addrdatasetfunc_t *addp, dns_dbload_t **dbloadp) {
6723         rbtdb_load_t *loadctx;
6724         dns_rbtdb_t *rbtdb;
6725
6726         rbtdb = (dns_rbtdb_t *)db;
6727
6728         REQUIRE(VALID_RBTDB(rbtdb));
6729
6730         loadctx = isc_mem_get(rbtdb->common.mctx, sizeof(*loadctx));
6731         if (loadctx == NULL)
6732                 return (ISC_R_NOMEMORY);
6733
6734         loadctx->rbtdb = rbtdb;
6735         if (IS_CACHE(rbtdb))
6736                 isc_stdtime_get(&loadctx->now);
6737         else
6738                 loadctx->now = 0;
6739
6740         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6741
6742         REQUIRE((rbtdb->attributes & (RBTDB_ATTR_LOADED|RBTDB_ATTR_LOADING))
6743                 == 0);
6744         rbtdb->attributes |= RBTDB_ATTR_LOADING;
6745
6746         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6747
6748         *addp = loading_addrdataset;
6749         *dbloadp = loadctx;
6750
6751         return (ISC_R_SUCCESS);
6752 }
6753
6754 static isc_result_t
6755 endload(dns_db_t *db, dns_dbload_t **dbloadp) {
6756         rbtdb_load_t *loadctx;
6757         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6758
6759         REQUIRE(VALID_RBTDB(rbtdb));
6760         REQUIRE(dbloadp != NULL);
6761         loadctx = *dbloadp;
6762         REQUIRE(loadctx->rbtdb == rbtdb);
6763
6764         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6765
6766         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADING) != 0);
6767         REQUIRE((rbtdb->attributes & RBTDB_ATTR_LOADED) == 0);
6768
6769         rbtdb->attributes &= ~RBTDB_ATTR_LOADING;
6770         rbtdb->attributes |= RBTDB_ATTR_LOADED;
6771
6772         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6773
6774         /*
6775          * If there's a KEY rdataset at the zone origin containing a
6776          * zone key, we consider the zone secure.
6777          */
6778         if (! IS_CACHE(rbtdb))
6779                 iszonesecure(db, rbtdb->current_version, rbtdb->origin_node);
6780
6781         *dbloadp = NULL;
6782
6783         isc_mem_put(rbtdb->common.mctx, loadctx, sizeof(*loadctx));
6784
6785         return (ISC_R_SUCCESS);
6786 }
6787
6788 static isc_result_t
6789 dump(dns_db_t *db, dns_dbversion_t *version, const char *filename,
6790      dns_masterformat_t masterformat) {
6791         dns_rbtdb_t *rbtdb;
6792
6793         rbtdb = (dns_rbtdb_t *)db;
6794
6795         REQUIRE(VALID_RBTDB(rbtdb));
6796
6797 #ifdef BIND9
6798         return (dns_master_dump2(rbtdb->common.mctx, db, version,
6799                                  &dns_master_style_default,
6800                                  filename, masterformat));
6801 #else
6802         UNUSED(version);
6803         UNUSED(filename);
6804         UNUSED(masterformat);
6805
6806         return (ISC_R_NOTIMPLEMENTED);
6807 #endif /* BIND9 */
6808 }
6809
6810 static void
6811 delete_callback(void *data, void *arg) {
6812         dns_rbtdb_t *rbtdb = arg;
6813         rdatasetheader_t *current, *next;
6814         unsigned int locknum;
6815
6816         current = data;
6817         locknum = current->node->locknum;
6818         NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6819         while (current != NULL) {
6820                 next = current->next;
6821                 free_rdataset(rbtdb, rbtdb->common.mctx, current);
6822                 current = next;
6823         }
6824         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
6825 }
6826
6827 static isc_boolean_t
6828 issecure(dns_db_t *db) {
6829         dns_rbtdb_t *rbtdb;
6830         isc_boolean_t secure;
6831
6832         rbtdb = (dns_rbtdb_t *)db;
6833
6834         REQUIRE(VALID_RBTDB(rbtdb));
6835
6836         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6837         secure = ISC_TF(rbtdb->current_version->secure == dns_db_secure);
6838         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6839
6840         return (secure);
6841 }
6842
6843 static isc_boolean_t
6844 isdnssec(dns_db_t *db) {
6845         dns_rbtdb_t *rbtdb;
6846         isc_boolean_t dnssec;
6847
6848         rbtdb = (dns_rbtdb_t *)db;
6849
6850         REQUIRE(VALID_RBTDB(rbtdb));
6851
6852         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6853         dnssec = ISC_TF(rbtdb->current_version->secure != dns_db_insecure);
6854         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6855
6856         return (dnssec);
6857 }
6858
6859 static unsigned int
6860 nodecount(dns_db_t *db) {
6861         dns_rbtdb_t *rbtdb;
6862         unsigned int count;
6863
6864         rbtdb = (dns_rbtdb_t *)db;
6865
6866         REQUIRE(VALID_RBTDB(rbtdb));
6867
6868         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6869         count = dns_rbt_nodecount(rbtdb->tree);
6870         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6871
6872         return (count);
6873 }
6874
6875 static void
6876 settask(dns_db_t *db, isc_task_t *task) {
6877         dns_rbtdb_t *rbtdb;
6878
6879         rbtdb = (dns_rbtdb_t *)db;
6880
6881         REQUIRE(VALID_RBTDB(rbtdb));
6882
6883         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
6884         if (rbtdb->task != NULL)
6885                 isc_task_detach(&rbtdb->task);
6886         if (task != NULL)
6887                 isc_task_attach(task, &rbtdb->task);
6888         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
6889 }
6890
6891 static isc_boolean_t
6892 ispersistent(dns_db_t *db) {
6893         UNUSED(db);
6894         return (ISC_FALSE);
6895 }
6896
6897 static isc_result_t
6898 getoriginnode(dns_db_t *db, dns_dbnode_t **nodep) {
6899         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6900         dns_rbtnode_t *onode;
6901         isc_result_t result = ISC_R_SUCCESS;
6902
6903         REQUIRE(VALID_RBTDB(rbtdb));
6904         REQUIRE(nodep != NULL && *nodep == NULL);
6905
6906         /* Note that the access to origin_node doesn't require a DB lock */
6907         onode = (dns_rbtnode_t *)rbtdb->origin_node;
6908         if (onode != NULL) {
6909                 NODE_STRONGLOCK(&rbtdb->node_locks[onode->locknum].lock);
6910                 new_reference(rbtdb, onode);
6911                 NODE_STRONGUNLOCK(&rbtdb->node_locks[onode->locknum].lock);
6912
6913                 *nodep = rbtdb->origin_node;
6914         } else {
6915                 INSIST(IS_CACHE(rbtdb));
6916                 result = ISC_R_NOTFOUND;
6917         }
6918
6919         return (result);
6920 }
6921
6922 static isc_result_t
6923 getnsec3parameters(dns_db_t *db, dns_dbversion_t *version, dns_hash_t *hash,
6924                    isc_uint8_t *flags, isc_uint16_t *iterations,
6925                    unsigned char *salt, size_t *salt_length)
6926 {
6927         dns_rbtdb_t *rbtdb;
6928         isc_result_t result = ISC_R_NOTFOUND;
6929         rbtdb_version_t *rbtversion = version;
6930
6931         rbtdb = (dns_rbtdb_t *)db;
6932
6933         REQUIRE(VALID_RBTDB(rbtdb));
6934
6935         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6936
6937         if (rbtversion == NULL)
6938                 rbtversion = rbtdb->current_version;
6939
6940         if (rbtversion->havensec3) {
6941                 if (hash != NULL)
6942                         *hash = rbtversion->hash;
6943                 if (salt != NULL && salt_length != NULL) {
6944                         REQUIRE(*salt_length >= rbtversion->salt_length);
6945                         memcpy(salt, rbtversion->salt, rbtversion->salt_length);
6946                 }
6947                 if (salt_length != NULL)
6948                         *salt_length = rbtversion->salt_length;
6949                 if (iterations != NULL)
6950                         *iterations = rbtversion->iterations;
6951                 if (flags != NULL)
6952                         *flags = rbtversion->flags;
6953                 result = ISC_R_SUCCESS;
6954         }
6955         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
6956
6957         return (result);
6958 }
6959
6960 static isc_result_t
6961 setsigningtime(dns_db_t *db, dns_rdataset_t *rdataset, isc_stdtime_t resign) {
6962         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
6963         isc_stdtime_t oldresign;
6964         isc_result_t result = ISC_R_SUCCESS;
6965         rdatasetheader_t *header;
6966
6967         REQUIRE(VALID_RBTDB(rbtdb));
6968         REQUIRE(!IS_CACHE(rbtdb));
6969         REQUIRE(rdataset != NULL);
6970
6971         header = rdataset->private3;
6972         header--;
6973
6974         NODE_LOCK(&rbtdb->node_locks[header->node->locknum].lock,
6975                   isc_rwlocktype_write);
6976
6977         oldresign = header->resign;
6978         header->resign = resign;
6979         if (header->heap_index != 0) {
6980                 INSIST(RESIGN(header));
6981                 if (resign == 0) {
6982                         isc_heap_delete(rbtdb->heaps[header->node->locknum],
6983                                         header->heap_index);
6984                         header->heap_index = 0;
6985                 } else if (resign < oldresign)
6986                         isc_heap_increased(rbtdb->heaps[header->node->locknum],
6987                                            header->heap_index);
6988                 else if (resign > oldresign)
6989                         isc_heap_decreased(rbtdb->heaps[header->node->locknum],
6990                                            header->heap_index);
6991         } else if (resign && header->heap_index == 0) {
6992                 header->attributes |= RDATASET_ATTR_RESIGN;
6993                 result = resign_insert(rbtdb, header->node->locknum, header);
6994         }
6995         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
6996                     isc_rwlocktype_write);
6997         return (result);
6998 }
6999
7000 static isc_result_t
7001 getsigningtime(dns_db_t *db, dns_rdataset_t *rdataset,
7002                dns_name_t *foundname)
7003 {
7004         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7005         rdatasetheader_t *header = NULL, *this;
7006         unsigned int i;
7007         isc_result_t result = ISC_R_NOTFOUND;
7008         unsigned int locknum;
7009
7010         REQUIRE(VALID_RBTDB(rbtdb));
7011
7012         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
7013
7014         for (i = 0; i < rbtdb->node_lock_count; i++) {
7015                 NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_read);
7016                 this = isc_heap_element(rbtdb->heaps[i], 1);
7017                 if (this == NULL) {
7018                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7019                                     isc_rwlocktype_read);
7020                         continue;
7021                 }
7022                 if (header == NULL)
7023                         header = this;
7024                 else if (isc_serial_lt(this->resign, header->resign)) {
7025                         locknum = header->node->locknum;
7026                         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
7027                                     isc_rwlocktype_read);
7028                         header = this;
7029                 } else
7030                         NODE_UNLOCK(&rbtdb->node_locks[i].lock,
7031                                     isc_rwlocktype_read);
7032         }
7033
7034         if (header == NULL)
7035                 goto unlock;
7036
7037         bind_rdataset(rbtdb, header->node, header, 0, rdataset);
7038
7039         if (foundname != NULL)
7040                 dns_rbt_fullnamefromnode(header->node, foundname);
7041
7042         NODE_UNLOCK(&rbtdb->node_locks[header->node->locknum].lock,
7043                     isc_rwlocktype_read);
7044
7045         result = ISC_R_SUCCESS;
7046
7047  unlock:
7048         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
7049
7050         return (result);
7051 }
7052
7053 static void
7054 resigned(dns_db_t *db, dns_rdataset_t *rdataset, dns_dbversion_t *version)
7055 {
7056         rbtdb_version_t *rbtversion = (rbtdb_version_t *)version;
7057         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7058         dns_rbtnode_t *node;
7059         rdatasetheader_t *header;
7060
7061         REQUIRE(VALID_RBTDB(rbtdb));
7062         REQUIRE(rdataset != NULL);
7063         REQUIRE(rbtdb->future_version == rbtversion);
7064         REQUIRE(rbtversion->writer);
7065
7066         node = rdataset->private2;
7067         header = rdataset->private3;
7068         header--;
7069
7070         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
7071         NODE_LOCK(&rbtdb->node_locks[node->locknum].lock,
7072                   isc_rwlocktype_write);
7073         /*
7074          * Delete from heap and save to re-signed list so that it can
7075          * be restored if we backout of this change.
7076          */
7077         new_reference(rbtdb, node);
7078         isc_heap_delete(rbtdb->heaps[node->locknum], header->heap_index);
7079         header->heap_index = 0;
7080         ISC_LIST_APPEND(rbtversion->resigned_list, header, link);
7081
7082         NODE_UNLOCK(&rbtdb->node_locks[node->locknum].lock,
7083                     isc_rwlocktype_write);
7084         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
7085 }
7086
7087 static dns_stats_t *
7088 getrrsetstats(dns_db_t *db) {
7089         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
7090
7091         REQUIRE(VALID_RBTDB(rbtdb));
7092         REQUIRE(IS_CACHE(rbtdb)); /* current restriction */
7093
7094         return (rbtdb->rrsetstats);
7095 }
7096
7097 static dns_dbmethods_t zone_methods = {
7098         attach,
7099         detach,
7100         beginload,
7101         endload,
7102         dump,
7103         currentversion,
7104         newversion,
7105         attachversion,
7106         closeversion,
7107         findnode,
7108         zone_find,
7109         zone_findzonecut,
7110         attachnode,
7111         detachnode,
7112         expirenode,
7113         printnode,
7114         createiterator,
7115         zone_findrdataset,
7116         allrdatasets,
7117         addrdataset,
7118         subtractrdataset,
7119         deleterdataset,
7120         issecure,
7121         nodecount,
7122         ispersistent,
7123         overmem,
7124         settask,
7125         getoriginnode,
7126         NULL,
7127         getnsec3parameters,
7128         findnsec3node,
7129         setsigningtime,
7130         getsigningtime,
7131         resigned,
7132         isdnssec,
7133         NULL
7134 };
7135
7136 static dns_dbmethods_t cache_methods = {
7137         attach,
7138         detach,
7139         beginload,
7140         endload,
7141         dump,
7142         currentversion,
7143         newversion,
7144         attachversion,
7145         closeversion,
7146         findnode,
7147         cache_find,
7148         cache_findzonecut,
7149         attachnode,
7150         detachnode,
7151         expirenode,
7152         printnode,
7153         createiterator,
7154         cache_findrdataset,
7155         allrdatasets,
7156         addrdataset,
7157         subtractrdataset,
7158         deleterdataset,
7159         issecure,
7160         nodecount,
7161         ispersistent,
7162         overmem,
7163         settask,
7164         getoriginnode,
7165         NULL,
7166         NULL,
7167         NULL,
7168         NULL,
7169         NULL,
7170         NULL,
7171         isdnssec,
7172         getrrsetstats
7173 };
7174
7175 isc_result_t
7176 #ifdef DNS_RBTDB_VERSION64
7177 dns_rbtdb64_create
7178 #else
7179 dns_rbtdb_create
7180 #endif
7181                 (isc_mem_t *mctx, dns_name_t *origin, dns_dbtype_t type,
7182                  dns_rdataclass_t rdclass, unsigned int argc, char *argv[],
7183                  void *driverarg, dns_db_t **dbp)
7184 {
7185         dns_rbtdb_t *rbtdb;
7186         isc_result_t result;
7187         int i;
7188         dns_name_t name;
7189         isc_boolean_t (*sooner)(void *, void *);
7190
7191         /* Keep the compiler happy. */
7192         UNUSED(argc);
7193         UNUSED(argv);
7194         UNUSED(driverarg);
7195
7196         rbtdb = isc_mem_get(mctx, sizeof(*rbtdb));
7197         if (rbtdb == NULL)
7198                 return (ISC_R_NOMEMORY);
7199
7200         memset(rbtdb, '\0', sizeof(*rbtdb));
7201         dns_name_init(&rbtdb->common.origin, NULL);
7202         rbtdb->common.attributes = 0;
7203         if (type == dns_dbtype_cache) {
7204                 rbtdb->common.methods = &cache_methods;
7205                 rbtdb->common.attributes |= DNS_DBATTR_CACHE;
7206         } else if (type == dns_dbtype_stub) {
7207                 rbtdb->common.methods = &zone_methods;
7208                 rbtdb->common.attributes |= DNS_DBATTR_STUB;
7209         } else
7210                 rbtdb->common.methods = &zone_methods;
7211         rbtdb->common.rdclass = rdclass;
7212         rbtdb->common.mctx = NULL;
7213
7214         result = RBTDB_INITLOCK(&rbtdb->lock);
7215         if (result != ISC_R_SUCCESS)
7216                 goto cleanup_rbtdb;
7217
7218         result = isc_rwlock_init(&rbtdb->tree_lock, 0, 0);
7219         if (result != ISC_R_SUCCESS)
7220                 goto cleanup_lock;
7221
7222         /*
7223          * Initialize node_lock_count in a generic way to support future
7224          * extension which allows the user to specify this value on creation.
7225          * Note that when specified for a cache DB it must be larger than 1
7226          * as commented with the definition of DEFAULT_CACHE_NODE_LOCK_COUNT.
7227          */
7228         if (rbtdb->node_lock_count == 0) {
7229                 if (IS_CACHE(rbtdb))
7230                         rbtdb->node_lock_count = DEFAULT_CACHE_NODE_LOCK_COUNT;
7231                 else
7232                         rbtdb->node_lock_count = DEFAULT_NODE_LOCK_COUNT;
7233         } else if (rbtdb->node_lock_count < 2 && IS_CACHE(rbtdb)) {
7234                 result = ISC_R_RANGE;
7235                 goto cleanup_tree_lock;
7236         }
7237         INSIST(rbtdb->node_lock_count < (1 << DNS_RBT_LOCKLENGTH));
7238         rbtdb->node_locks = isc_mem_get(mctx, rbtdb->node_lock_count *
7239                                         sizeof(rbtdb_nodelock_t));
7240         if (rbtdb->node_locks == NULL) {
7241                 result = ISC_R_NOMEMORY;
7242                 goto cleanup_tree_lock;
7243         }
7244
7245         rbtdb->rrsetstats = NULL;
7246         if (IS_CACHE(rbtdb)) {
7247                 result = dns_rdatasetstats_create(mctx, &rbtdb->rrsetstats);
7248                 if (result != ISC_R_SUCCESS)
7249                         goto cleanup_node_locks;
7250                 rbtdb->rdatasets = isc_mem_get(mctx, rbtdb->node_lock_count *
7251                                                sizeof(rdatasetheaderlist_t));
7252                 if (rbtdb->rdatasets == NULL) {
7253                         result = ISC_R_NOMEMORY;
7254                         goto cleanup_rrsetstats;
7255                 }
7256                 for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7257                         ISC_LIST_INIT(rbtdb->rdatasets[i]);
7258         } else
7259                 rbtdb->rdatasets = NULL;
7260
7261         /*
7262          * Create the heaps.
7263          */
7264         rbtdb->heaps = isc_mem_get(mctx, rbtdb->node_lock_count *
7265                                    sizeof(isc_heap_t *));
7266         if (rbtdb->heaps == NULL) {
7267                 result = ISC_R_NOMEMORY;
7268                 goto cleanup_rdatasets;
7269         }
7270         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7271                 rbtdb->heaps[i] = NULL;
7272         sooner = IS_CACHE(rbtdb) ? ttl_sooner : resign_sooner;
7273         for (i = 0; i < (int)rbtdb->node_lock_count; i++) {
7274                 result = isc_heap_create(mctx, sooner, set_index, 0,
7275                                          &rbtdb->heaps[i]);
7276                 if (result != ISC_R_SUCCESS)
7277                         goto cleanup_heaps;
7278         }
7279
7280         /*
7281          * Create deadnode lists.
7282          */
7283         rbtdb->deadnodes = isc_mem_get(mctx, rbtdb->node_lock_count *
7284                                        sizeof(rbtnodelist_t));
7285         if (rbtdb->deadnodes == NULL) {
7286                 result = ISC_R_NOMEMORY;
7287                 goto cleanup_heaps;
7288         }
7289         for (i = 0; i < (int)rbtdb->node_lock_count; i++)
7290                 ISC_LIST_INIT(rbtdb->deadnodes[i]);
7291
7292         rbtdb->active = rbtdb->node_lock_count;
7293
7294         for (i = 0; i < (int)(rbtdb->node_lock_count); i++) {
7295                 result = NODE_INITLOCK(&rbtdb->node_locks[i].lock);
7296                 if (result == ISC_R_SUCCESS) {
7297                         result = isc_refcount_init(&rbtdb->node_locks[i].references, 0);
7298                         if (result != ISC_R_SUCCESS)
7299                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7300                 }
7301                 if (result != ISC_R_SUCCESS) {
7302                         while (i-- > 0) {
7303                                 NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
7304                                 isc_refcount_decrement(&rbtdb->node_locks[i].references, NULL);
7305                                 isc_refcount_destroy(&rbtdb->node_locks[i].references);
7306                         }
7307                         goto cleanup_deadnodes;
7308                 }
7309                 rbtdb->node_locks[i].exiting = ISC_FALSE;
7310         }
7311
7312         /*
7313          * Attach to the mctx.  The database will persist so long as there
7314          * are references to it, and attaching to the mctx ensures that our
7315          * mctx won't disappear out from under us.
7316          */
7317         isc_mem_attach(mctx, &rbtdb->common.mctx);
7318
7319         /*
7320          * Must be initialized before free_rbtdb() is called.
7321          */
7322         isc_ondestroy_init(&rbtdb->common.ondest);
7323
7324         /*
7325          * Make a copy of the origin name.
7326          */
7327         result = dns_name_dupwithoffsets(origin, mctx, &rbtdb->common.origin);
7328         if (result != ISC_R_SUCCESS) {
7329                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7330                 return (result);
7331         }
7332
7333         /*
7334          * Make the Red-Black Trees.
7335          */
7336         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->tree);
7337         if (result != ISC_R_SUCCESS) {
7338                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7339                 return (result);
7340         }
7341
7342         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec);
7343         if (result != ISC_R_SUCCESS) {
7344                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7345                 return (result);
7346         }
7347
7348         result = dns_rbt_create(mctx, delete_callback, rbtdb, &rbtdb->nsec3);
7349         if (result != ISC_R_SUCCESS) {
7350                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7351                 return (result);
7352         }
7353
7354         /*
7355          * In order to set the node callback bit correctly in zone databases,
7356          * we need to know if the node has the origin name of the zone.
7357          * In loading_addrdataset() we could simply compare the new name
7358          * to the origin name, but this is expensive.  Also, we don't know the
7359          * node name in addrdataset(), so we need another way of knowing the
7360          * zone's top.
7361          *
7362          * We now explicitly create a node for the zone's origin, and then
7363          * we simply remember the node's address.  This is safe, because
7364          * the top-of-zone node can never be deleted, nor can its address
7365          * change.
7366          */
7367         if (!IS_CACHE(rbtdb)) {
7368                 rbtdb->origin_node = NULL;
7369                 result = dns_rbt_addnode(rbtdb->tree, &rbtdb->common.origin,
7370                                          &rbtdb->origin_node);
7371                 if (result != ISC_R_SUCCESS) {
7372                         INSIST(result != ISC_R_EXISTS);
7373                         free_rbtdb(rbtdb, ISC_FALSE, NULL);
7374                         return (result);
7375                 }
7376                 rbtdb->origin_node->nsec = DNS_RBT_NSEC_NORMAL;
7377                 /*
7378                  * We need to give the origin node the right locknum.
7379                  */
7380                 dns_name_init(&name, NULL);
7381                 dns_rbt_namefromnode(rbtdb->origin_node, &name);
7382 #ifdef DNS_RBT_USEHASH
7383                 rbtdb->origin_node->locknum =
7384                         rbtdb->origin_node->hashval %
7385                         rbtdb->node_lock_count;
7386 #else
7387                 rbtdb->origin_node->locknum =
7388                         dns_name_hash(&name, ISC_TRUE) %
7389                         rbtdb->node_lock_count;
7390 #endif
7391         }
7392
7393         /*
7394          * Misc. Initialization.
7395          */
7396         result = isc_refcount_init(&rbtdb->references, 1);
7397         if (result != ISC_R_SUCCESS) {
7398                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7399                 return (result);
7400         }
7401         rbtdb->attributes = 0;
7402         rbtdb->overmem = ISC_FALSE;
7403         rbtdb->task = NULL;
7404
7405         /*
7406          * Version Initialization.
7407          */
7408         rbtdb->current_serial = 1;
7409         rbtdb->least_serial = 1;
7410         rbtdb->next_serial = 2;
7411         rbtdb->current_version = allocate_version(mctx, 1, 1, ISC_FALSE);
7412         if (rbtdb->current_version == NULL) {
7413                 isc_refcount_decrement(&rbtdb->references, NULL);
7414                 isc_refcount_destroy(&rbtdb->references);
7415                 free_rbtdb(rbtdb, ISC_FALSE, NULL);
7416                 return (ISC_R_NOMEMORY);
7417         }
7418         rbtdb->current_version->secure = dns_db_insecure;
7419         rbtdb->current_version->havensec3 = ISC_FALSE;
7420         rbtdb->current_version->flags = 0;
7421         rbtdb->current_version->iterations = 0;
7422         rbtdb->current_version->hash = 0;
7423         rbtdb->current_version->salt_length = 0;
7424         memset(rbtdb->current_version->salt, 0,
7425                sizeof(rbtdb->current_version->salt));
7426         rbtdb->future_version = NULL;
7427         ISC_LIST_INIT(rbtdb->open_versions);
7428         /*
7429          * Keep the current version in the open list so that list operation
7430          * won't happen in normal lookup operations.
7431          */
7432         PREPEND(rbtdb->open_versions, rbtdb->current_version, link);
7433
7434         rbtdb->common.magic = DNS_DB_MAGIC;
7435         rbtdb->common.impmagic = RBTDB_MAGIC;
7436
7437         *dbp = (dns_db_t *)rbtdb;
7438
7439         return (ISC_R_SUCCESS);
7440
7441  cleanup_deadnodes:
7442         isc_mem_put(mctx, rbtdb->deadnodes,
7443                     rbtdb->node_lock_count * sizeof(rbtnodelist_t));
7444
7445  cleanup_heaps:
7446         if (rbtdb->heaps != NULL) {
7447                 for (i = 0 ; i < (int)rbtdb->node_lock_count ; i++)
7448                         if (rbtdb->heaps[i] != NULL)
7449                                 isc_heap_destroy(&rbtdb->heaps[i]);
7450                 isc_mem_put(mctx, rbtdb->heaps,
7451                             rbtdb->node_lock_count * sizeof(isc_heap_t *));
7452         }
7453
7454  cleanup_rdatasets:
7455         if (rbtdb->rdatasets != NULL)
7456                 isc_mem_put(mctx, rbtdb->rdatasets, rbtdb->node_lock_count *
7457                             sizeof(rdatasetheaderlist_t));
7458  cleanup_rrsetstats:
7459         if (rbtdb->rrsetstats != NULL)
7460                 dns_stats_detach(&rbtdb->rrsetstats);
7461
7462  cleanup_node_locks:
7463         isc_mem_put(mctx, rbtdb->node_locks,
7464                     rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
7465
7466  cleanup_tree_lock:
7467         isc_rwlock_destroy(&rbtdb->tree_lock);
7468
7469  cleanup_lock:
7470         RBTDB_DESTROYLOCK(&rbtdb->lock);
7471
7472  cleanup_rbtdb:
7473         isc_mem_put(mctx, rbtdb,  sizeof(*rbtdb));
7474         return (result);
7475 }
7476
7477
7478 /*
7479  * Slabbed Rdataset Methods
7480  */
7481
7482 static void
7483 rdataset_disassociate(dns_rdataset_t *rdataset) {
7484         dns_db_t *db = rdataset->private1;
7485         dns_dbnode_t *node = rdataset->private2;
7486
7487         detachnode(db, &node);
7488 }
7489
7490 static isc_result_t
7491 rdataset_first(dns_rdataset_t *rdataset) {
7492         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7493         unsigned int count;
7494
7495         count = raw[0] * 256 + raw[1];
7496         if (count == 0) {
7497                 rdataset->private5 = NULL;
7498                 return (ISC_R_NOMORE);
7499         }
7500
7501 #if DNS_RDATASET_FIXED
7502         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0)
7503                 raw += 2 + (4 * count);
7504         else
7505 #endif
7506                 raw += 2;
7507
7508         /*
7509          * The privateuint4 field is the number of rdata beyond the
7510          * cursor position, so we decrement the total count by one
7511          * before storing it.
7512          *
7513          * If DNS_RDATASETATTR_LOADORDER is not set 'raw' points to the
7514          * first record.  If DNS_RDATASETATTR_LOADORDER is set 'raw' points
7515          * to the first entry in the offset table.
7516          */
7517         count--;
7518         rdataset->privateuint4 = count;
7519         rdataset->private5 = raw;
7520
7521         return (ISC_R_SUCCESS);
7522 }
7523
7524 static isc_result_t
7525 rdataset_next(dns_rdataset_t *rdataset) {
7526         unsigned int count;
7527         unsigned int length;
7528         unsigned char *raw;     /* RDATASLAB */
7529
7530         count = rdataset->privateuint4;
7531         if (count == 0)
7532                 return (ISC_R_NOMORE);
7533         count--;
7534         rdataset->privateuint4 = count;
7535
7536         /*
7537          * Skip forward one record (length + 4) or one offset (4).
7538          */
7539         raw = rdataset->private5;
7540 #if DNS_RDATASET_FIXED
7541         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) == 0) {
7542 #endif
7543                 length = raw[0] * 256 + raw[1];
7544                 raw += length;
7545 #if DNS_RDATASET_FIXED
7546         }
7547         rdataset->private5 = raw + 4;           /* length(2) + order(2) */
7548 #else
7549         rdataset->private5 = raw + 2;           /* length(2) */
7550 #endif
7551
7552         return (ISC_R_SUCCESS);
7553 }
7554
7555 static void
7556 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata) {
7557         unsigned char *raw = rdataset->private5;        /* RDATASLAB */
7558 #if DNS_RDATASET_FIXED
7559         unsigned int offset;
7560 #endif
7561         unsigned int length;
7562         isc_region_t r;
7563         unsigned int flags = 0;
7564
7565         REQUIRE(raw != NULL);
7566
7567         /*
7568          * Find the start of the record if not already in private5
7569          * then skip the length and order fields.
7570          */
7571 #if DNS_RDATASET_FIXED
7572         if ((rdataset->attributes & DNS_RDATASETATTR_LOADORDER) != 0) {
7573                 offset = (raw[0] << 24) + (raw[1] << 16) +
7574                          (raw[2] << 8) + raw[3];
7575                 raw = rdataset->private3;
7576                 raw += offset;
7577         }
7578 #endif
7579         length = raw[0] * 256 + raw[1];
7580 #if DNS_RDATASET_FIXED
7581         raw += 4;
7582 #else
7583         raw += 2;
7584 #endif
7585         if (rdataset->type == dns_rdatatype_rrsig) {
7586                 if (*raw & DNS_RDATASLAB_OFFLINE)
7587                         flags |= DNS_RDATA_OFFLINE;
7588                 length--;
7589                 raw++;
7590         }
7591         r.length = length;
7592         r.base = raw;
7593         dns_rdata_fromregion(rdata, rdataset->rdclass, rdataset->type, &r);
7594         rdata->flags |= flags;
7595 }
7596
7597 static void
7598 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target) {
7599         dns_db_t *db = source->private1;
7600         dns_dbnode_t *node = source->private2;
7601         dns_dbnode_t *cloned_node = NULL;
7602
7603         attachnode(db, node, &cloned_node);
7604         *target = *source;
7605
7606         /*
7607          * Reset iterator state.
7608          */
7609         target->privateuint4 = 0;
7610         target->private5 = NULL;
7611 }
7612
7613 static unsigned int
7614 rdataset_count(dns_rdataset_t *rdataset) {
7615         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
7616         unsigned int count;
7617
7618         count = raw[0] * 256 + raw[1];
7619
7620         return (count);
7621 }
7622
7623 static isc_result_t
7624 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
7625                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7626 {
7627         dns_db_t *db = rdataset->private1;
7628         dns_dbnode_t *node = rdataset->private2;
7629         dns_dbnode_t *cloned_node;
7630         struct noqname *noqname = rdataset->private6;
7631
7632         cloned_node = NULL;
7633         attachnode(db, node, &cloned_node);
7634         nsec->methods = &rdataset_methods;
7635         nsec->rdclass = db->rdclass;
7636         nsec->type = noqname->type;
7637         nsec->covers = 0;
7638         nsec->ttl = rdataset->ttl;
7639         nsec->trust = rdataset->trust;
7640         nsec->private1 = rdataset->private1;
7641         nsec->private2 = rdataset->private2;
7642         nsec->private3 = noqname->neg;
7643         nsec->privateuint4 = 0;
7644         nsec->private5 = NULL;
7645         nsec->private6 = NULL;
7646         nsec->private7 = NULL;
7647
7648         cloned_node = NULL;
7649         attachnode(db, node, &cloned_node);
7650         nsecsig->methods = &rdataset_methods;
7651         nsecsig->rdclass = db->rdclass;
7652         nsecsig->type = dns_rdatatype_rrsig;
7653         nsecsig->covers = noqname->type;
7654         nsecsig->ttl = rdataset->ttl;
7655         nsecsig->trust = rdataset->trust;
7656         nsecsig->private1 = rdataset->private1;
7657         nsecsig->private2 = rdataset->private2;
7658         nsecsig->private3 = noqname->negsig;
7659         nsecsig->privateuint4 = 0;
7660         nsecsig->private5 = NULL;
7661         nsec->private6 = NULL;
7662         nsec->private7 = NULL;
7663
7664         dns_name_clone(&noqname->name, name);
7665
7666         return (ISC_R_SUCCESS);
7667 }
7668
7669 static isc_result_t
7670 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
7671                     dns_rdataset_t *nsec, dns_rdataset_t *nsecsig)
7672 {
7673         dns_db_t *db = rdataset->private1;
7674         dns_dbnode_t *node = rdataset->private2;
7675         dns_dbnode_t *cloned_node;
7676         struct noqname *closest = rdataset->private7;
7677
7678         cloned_node = NULL;
7679         attachnode(db, node, &cloned_node);
7680         nsec->methods = &rdataset_methods;
7681         nsec->rdclass = db->rdclass;
7682         nsec->type = closest->type;
7683         nsec->covers = 0;
7684         nsec->ttl = rdataset->ttl;
7685         nsec->trust = rdataset->trust;
7686         nsec->private1 = rdataset->private1;
7687         nsec->private2 = rdataset->private2;
7688         nsec->private3 = closest->neg;
7689         nsec->privateuint4 = 0;
7690         nsec->private5 = NULL;
7691         nsec->private6 = NULL;
7692         nsec->private7 = NULL;
7693
7694         cloned_node = NULL;
7695         attachnode(db, node, &cloned_node);
7696         nsecsig->methods = &rdataset_methods;
7697         nsecsig->rdclass = db->rdclass;
7698         nsecsig->type = dns_rdatatype_rrsig;
7699         nsecsig->covers = closest->type;
7700         nsecsig->ttl = rdataset->ttl;
7701         nsecsig->trust = rdataset->trust;
7702         nsecsig->private1 = rdataset->private1;
7703         nsecsig->private2 = rdataset->private2;
7704         nsecsig->private3 = closest->negsig;
7705         nsecsig->privateuint4 = 0;
7706         nsecsig->private5 = NULL;
7707         nsec->private6 = NULL;
7708         nsec->private7 = NULL;
7709
7710         dns_name_clone(&closest->name, name);
7711
7712         return (ISC_R_SUCCESS);
7713 }
7714
7715 static void
7716 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust) {
7717         dns_rbtdb_t *rbtdb = rdataset->private1;
7718         dns_rbtnode_t *rbtnode = rdataset->private2;
7719         rdatasetheader_t *header = rdataset->private3;
7720
7721         header--;
7722         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7723                   isc_rwlocktype_write);
7724         header->trust = rdataset->trust = trust;
7725         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7726                   isc_rwlocktype_write);
7727 }
7728
7729 static void
7730 rdataset_expire(dns_rdataset_t *rdataset) {
7731         dns_rbtdb_t *rbtdb = rdataset->private1;
7732         dns_rbtnode_t *rbtnode = rdataset->private2;
7733         rdatasetheader_t *header = rdataset->private3;
7734
7735         header--;
7736         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7737                   isc_rwlocktype_write);
7738         expire_header(rbtdb, header, ISC_FALSE);
7739         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7740                   isc_rwlocktype_write);
7741 }
7742
7743 /*
7744  * Rdataset Iterator Methods
7745  */
7746
7747 static void
7748 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp) {
7749         rbtdb_rdatasetiter_t *rbtiterator;
7750
7751         rbtiterator = (rbtdb_rdatasetiter_t *)(*iteratorp);
7752
7753         if (rbtiterator->common.version != NULL)
7754                 closeversion(rbtiterator->common.db,
7755                              &rbtiterator->common.version, ISC_FALSE);
7756         detachnode(rbtiterator->common.db, &rbtiterator->common.node);
7757         isc_mem_put(rbtiterator->common.db->mctx, rbtiterator,
7758                     sizeof(*rbtiterator));
7759
7760         *iteratorp = NULL;
7761 }
7762
7763 static isc_result_t
7764 rdatasetiter_first(dns_rdatasetiter_t *iterator) {
7765         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7766         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7767         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7768         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7769         rdatasetheader_t *header, *top_next;
7770         rbtdb_serial_t serial;
7771         isc_stdtime_t now;
7772
7773         if (IS_CACHE(rbtdb)) {
7774                 serial = 1;
7775                 now = rbtiterator->common.now;
7776         } else {
7777                 serial = rbtversion->serial;
7778                 now = 0;
7779         }
7780
7781         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7782                   isc_rwlocktype_read);
7783
7784         for (header = rbtnode->data; header != NULL; header = top_next) {
7785                 top_next = header->next;
7786                 do {
7787                         if (header->serial <= serial && !IGNORE(header)) {
7788                                 /*
7789                                  * Is this a "this rdataset doesn't exist"
7790                                  * record?  Or is it too old in the cache?
7791                                  *
7792                                  * Note: unlike everywhere else, we
7793                                  * check for now > header->rdh_ttl instead
7794                                  * of now >= header->rdh_ttl.  This allows
7795                                  * ANY and RRSIG queries for 0 TTL
7796                                  * rdatasets to work.
7797                                  */
7798                                 if (NONEXISTENT(header) ||
7799                                     (now != 0 && now > header->rdh_ttl))
7800                                         header = NULL;
7801                                 break;
7802                         } else
7803                                 header = header->down;
7804                 } while (header != NULL);
7805                 if (header != NULL)
7806                         break;
7807         }
7808
7809         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7810                     isc_rwlocktype_read);
7811
7812         rbtiterator->current = header;
7813
7814         if (header == NULL)
7815                 return (ISC_R_NOMORE);
7816
7817         return (ISC_R_SUCCESS);
7818 }
7819
7820 static isc_result_t
7821 rdatasetiter_next(dns_rdatasetiter_t *iterator) {
7822         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7823         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7824         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7825         rbtdb_version_t *rbtversion = rbtiterator->common.version;
7826         rdatasetheader_t *header, *top_next;
7827         rbtdb_serial_t serial;
7828         isc_stdtime_t now;
7829         rbtdb_rdatatype_t type, negtype;
7830         dns_rdatatype_t rdtype, covers;
7831
7832         header = rbtiterator->current;
7833         if (header == NULL)
7834                 return (ISC_R_NOMORE);
7835
7836         if (IS_CACHE(rbtdb)) {
7837                 serial = 1;
7838                 now = rbtiterator->common.now;
7839         } else {
7840                 serial = rbtversion->serial;
7841                 now = 0;
7842         }
7843
7844         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7845                   isc_rwlocktype_read);
7846
7847         type = header->type;
7848         rdtype = RBTDB_RDATATYPE_BASE(header->type);
7849         if (rdtype == 0) {
7850                 covers = RBTDB_RDATATYPE_EXT(header->type);
7851                 negtype = RBTDB_RDATATYPE_VALUE(covers, 0);
7852         } else
7853                 negtype = RBTDB_RDATATYPE_VALUE(0, rdtype);
7854         for (header = header->next; header != NULL; header = top_next) {
7855                 top_next = header->next;
7856                 /*
7857                  * If not walking back up the down list.
7858                  */
7859                 if (header->type != type && header->type != negtype) {
7860                         do {
7861                                 if (header->serial <= serial &&
7862                                     !IGNORE(header)) {
7863                                         /*
7864                                          * Is this a "this rdataset doesn't
7865                                          * exist" record?
7866                                          *
7867                                          * Note: unlike everywhere else, we
7868                                          * check for now > header->ttl instead
7869                                          * of now >= header->ttl.  This allows
7870                                          * ANY and RRSIG queries for 0 TTL
7871                                          * rdatasets to work.
7872                                          */
7873                                         if ((header->attributes &
7874                                              RDATASET_ATTR_NONEXISTENT) != 0 ||
7875                                             (now != 0 && now > header->rdh_ttl))
7876                                                 header = NULL;
7877                                         break;
7878                                 } else
7879                                         header = header->down;
7880                         } while (header != NULL);
7881                         if (header != NULL)
7882                                 break;
7883                 }
7884         }
7885
7886         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7887                     isc_rwlocktype_read);
7888
7889         rbtiterator->current = header;
7890
7891         if (header == NULL)
7892                 return (ISC_R_NOMORE);
7893
7894         return (ISC_R_SUCCESS);
7895 }
7896
7897 static void
7898 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset) {
7899         rbtdb_rdatasetiter_t *rbtiterator = (rbtdb_rdatasetiter_t *)iterator;
7900         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(rbtiterator->common.db);
7901         dns_rbtnode_t *rbtnode = rbtiterator->common.node;
7902         rdatasetheader_t *header;
7903
7904         header = rbtiterator->current;
7905         REQUIRE(header != NULL);
7906
7907         NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7908                   isc_rwlocktype_read);
7909
7910         bind_rdataset(rbtdb, rbtnode, header, rbtiterator->common.now,
7911                       rdataset);
7912
7913         NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
7914                     isc_rwlocktype_read);
7915 }
7916
7917
7918 /*
7919  * Database Iterator Methods
7920  */
7921
7922 static inline void
7923 reference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7924         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7925         dns_rbtnode_t *node = rbtdbiter->node;
7926
7927         if (node == NULL)
7928                 return;
7929
7930         INSIST(rbtdbiter->tree_locked != isc_rwlocktype_none);
7931         reactivate_node(rbtdb, node, rbtdbiter->tree_locked);
7932 }
7933
7934 static inline void
7935 dereference_iter_node(rbtdb_dbiterator_t *rbtdbiter) {
7936         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7937         dns_rbtnode_t *node = rbtdbiter->node;
7938         nodelock_t *lock;
7939
7940         if (node == NULL)
7941                 return;
7942
7943         lock = &rbtdb->node_locks[node->locknum].lock;
7944         NODE_LOCK(lock, isc_rwlocktype_read);
7945         decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
7946                             rbtdbiter->tree_locked, ISC_FALSE);
7947         NODE_UNLOCK(lock, isc_rwlocktype_read);
7948
7949         rbtdbiter->node = NULL;
7950 }
7951
7952 static void
7953 flush_deletions(rbtdb_dbiterator_t *rbtdbiter) {
7954         dns_rbtnode_t *node;
7955         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
7956         isc_boolean_t was_read_locked = ISC_FALSE;
7957         nodelock_t *lock;
7958         int i;
7959
7960         if (rbtdbiter->delete != 0) {
7961                 /*
7962                  * Note that "%d node of %d in tree" can report things like
7963                  * "flush_deletions: 59 nodes of 41 in tree".  This means
7964                  * That some nodes appear on the deletions list more than
7965                  * once.  Only the last occurence will actually be deleted.
7966                  */
7967                 isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
7968                               DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
7969                               "flush_deletions: %d nodes of %d in tree",
7970                               rbtdbiter->delete,
7971                               dns_rbt_nodecount(rbtdb->tree));
7972
7973                 if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
7974                         RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7975                         was_read_locked = ISC_TRUE;
7976                 }
7977                 RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7978                 rbtdbiter->tree_locked = isc_rwlocktype_write;
7979
7980                 for (i = 0; i < rbtdbiter->delete; i++) {
7981                         node = rbtdbiter->deletions[i];
7982                         lock = &rbtdb->node_locks[node->locknum].lock;
7983
7984                         NODE_LOCK(lock, isc_rwlocktype_read);
7985                         decrement_reference(rbtdb, node, 0,
7986                                             isc_rwlocktype_read,
7987                                             rbtdbiter->tree_locked, ISC_FALSE);
7988                         NODE_UNLOCK(lock, isc_rwlocktype_read);
7989                 }
7990
7991                 rbtdbiter->delete = 0;
7992
7993                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
7994                 if (was_read_locked) {
7995                         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
7996                         rbtdbiter->tree_locked = isc_rwlocktype_read;
7997
7998                 } else {
7999                         rbtdbiter->tree_locked = isc_rwlocktype_none;
8000                 }
8001         }
8002 }
8003
8004 static inline void
8005 resume_iteration(rbtdb_dbiterator_t *rbtdbiter) {
8006         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8007
8008         REQUIRE(rbtdbiter->paused);
8009         REQUIRE(rbtdbiter->tree_locked == isc_rwlocktype_none);
8010
8011         RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8012         rbtdbiter->tree_locked = isc_rwlocktype_read;
8013
8014         rbtdbiter->paused = ISC_FALSE;
8015 }
8016
8017 static void
8018 dbiterator_destroy(dns_dbiterator_t **iteratorp) {
8019         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)(*iteratorp);
8020         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)rbtdbiter->common.db;
8021         dns_db_t *db = NULL;
8022
8023         if (rbtdbiter->tree_locked == isc_rwlocktype_read) {
8024                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8025                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8026         } else
8027                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_none);
8028
8029         dereference_iter_node(rbtdbiter);
8030
8031         flush_deletions(rbtdbiter);
8032
8033         dns_db_attach(rbtdbiter->common.db, &db);
8034         dns_db_detach(&rbtdbiter->common.db);
8035
8036         dns_rbtnodechain_reset(&rbtdbiter->chain);
8037         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8038         isc_mem_put(db->mctx, rbtdbiter, sizeof(*rbtdbiter));
8039         dns_db_detach(&db);
8040
8041         *iteratorp = NULL;
8042 }
8043
8044 static isc_result_t
8045 dbiterator_first(dns_dbiterator_t *iterator) {
8046         isc_result_t result;
8047         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8048         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8049         dns_name_t *name, *origin;
8050
8051         if (rbtdbiter->result != ISC_R_SUCCESS &&
8052             rbtdbiter->result != ISC_R_NOMORE)
8053                 return (rbtdbiter->result);
8054
8055         if (rbtdbiter->paused)
8056                 resume_iteration(rbtdbiter);
8057
8058         dereference_iter_node(rbtdbiter);
8059
8060         name = dns_fixedname_name(&rbtdbiter->name);
8061         origin = dns_fixedname_name(&rbtdbiter->origin);
8062         dns_rbtnodechain_reset(&rbtdbiter->chain);
8063         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8064
8065         if (rbtdbiter->nsec3only) {
8066                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8067                 result = dns_rbtnodechain_first(rbtdbiter->current,
8068                                                 rbtdb->nsec3, name, origin);
8069         } else {
8070                 rbtdbiter->current = &rbtdbiter->chain;
8071                 result = dns_rbtnodechain_first(rbtdbiter->current,
8072                                                 rbtdb->tree, name, origin);
8073                 if (!rbtdbiter->nonsec3 && result == ISC_R_NOTFOUND) {
8074                         rbtdbiter->current = &rbtdbiter->nsec3chain;
8075                         result = dns_rbtnodechain_first(rbtdbiter->current,
8076                                                         rbtdb->nsec3, name,
8077                                                         origin);
8078                 }
8079         }
8080         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8081                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8082                                                   NULL, &rbtdbiter->node);
8083                 if (result == ISC_R_SUCCESS) {
8084                         rbtdbiter->new_origin = ISC_TRUE;
8085                         reference_iter_node(rbtdbiter);
8086                 }
8087         } else {
8088                 INSIST(result == ISC_R_NOTFOUND);
8089                 result = ISC_R_NOMORE; /* The tree is empty. */
8090         }
8091
8092         rbtdbiter->result = result;
8093
8094         return (result);
8095 }
8096
8097 static isc_result_t
8098 dbiterator_last(dns_dbiterator_t *iterator) {
8099         isc_result_t result;
8100         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8101         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8102         dns_name_t *name, *origin;
8103
8104         if (rbtdbiter->result != ISC_R_SUCCESS &&
8105             rbtdbiter->result != ISC_R_NOMORE)
8106                 return (rbtdbiter->result);
8107
8108         if (rbtdbiter->paused)
8109                 resume_iteration(rbtdbiter);
8110
8111         dereference_iter_node(rbtdbiter);
8112
8113         name = dns_fixedname_name(&rbtdbiter->name);
8114         origin = dns_fixedname_name(&rbtdbiter->origin);
8115         dns_rbtnodechain_reset(&rbtdbiter->chain);
8116         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8117
8118         result = ISC_R_NOTFOUND;
8119         if (rbtdbiter->nsec3only && !rbtdbiter->nonsec3) {
8120                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8121                 result = dns_rbtnodechain_last(rbtdbiter->current,
8122                                                rbtdb->nsec3, name, origin);
8123         }
8124         if (!rbtdbiter->nsec3only && result == ISC_R_NOTFOUND) {
8125                 rbtdbiter->current = &rbtdbiter->chain;
8126                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8127                                                name, origin);
8128         }
8129         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
8130                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8131                                                   NULL, &rbtdbiter->node);
8132                 if (result == ISC_R_SUCCESS) {
8133                         rbtdbiter->new_origin = ISC_TRUE;
8134                         reference_iter_node(rbtdbiter);
8135                 }
8136         } else {
8137                 INSIST(result == ISC_R_NOTFOUND);
8138                 result = ISC_R_NOMORE; /* The tree is empty. */
8139         }
8140
8141         rbtdbiter->result = result;
8142
8143         return (result);
8144 }
8145
8146 static isc_result_t
8147 dbiterator_seek(dns_dbiterator_t *iterator, dns_name_t *name) {
8148         isc_result_t result;
8149         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8150         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8151         dns_name_t *iname, *origin;
8152
8153         if (rbtdbiter->result != ISC_R_SUCCESS &&
8154             rbtdbiter->result != ISC_R_NOTFOUND &&
8155             rbtdbiter->result != ISC_R_NOMORE)
8156                 return (rbtdbiter->result);
8157
8158         if (rbtdbiter->paused)
8159                 resume_iteration(rbtdbiter);
8160
8161         dereference_iter_node(rbtdbiter);
8162
8163         iname = dns_fixedname_name(&rbtdbiter->name);
8164         origin = dns_fixedname_name(&rbtdbiter->origin);
8165         dns_rbtnodechain_reset(&rbtdbiter->chain);
8166         dns_rbtnodechain_reset(&rbtdbiter->nsec3chain);
8167
8168         if (rbtdbiter->nsec3only) {
8169                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8170                 result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8171                                           &rbtdbiter->node,
8172                                           rbtdbiter->current,
8173                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8174         } else if (rbtdbiter->nonsec3) {
8175                 rbtdbiter->current = &rbtdbiter->chain;
8176                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8177                                           &rbtdbiter->node,
8178                                           rbtdbiter->current,
8179                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8180         } else {
8181                 /*
8182                  * Stay on main chain if not found on either chain.
8183                  */
8184                 rbtdbiter->current = &rbtdbiter->chain;
8185                 result = dns_rbt_findnode(rbtdb->tree, name, NULL,
8186                                           &rbtdbiter->node,
8187                                           rbtdbiter->current,
8188                                           DNS_RBTFIND_EMPTYDATA, NULL, NULL);
8189                 if (result == DNS_R_PARTIALMATCH) {
8190                         dns_rbtnode_t *node = NULL;
8191                         result = dns_rbt_findnode(rbtdb->nsec3, name, NULL,
8192                                                   &node, &rbtdbiter->nsec3chain,
8193                                                   DNS_RBTFIND_EMPTYDATA,
8194                                                   NULL, NULL);
8195                         if (result == ISC_R_SUCCESS) {
8196                                 rbtdbiter->node = node;
8197                                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8198                         }
8199                 }
8200         }
8201
8202 #if 1
8203         if (result == ISC_R_SUCCESS) {
8204                 result = dns_rbtnodechain_current(rbtdbiter->current, iname,
8205                                                   origin, NULL);
8206                 if (result == ISC_R_SUCCESS) {
8207                         rbtdbiter->new_origin = ISC_TRUE;
8208                         reference_iter_node(rbtdbiter);
8209                 }
8210         } else if (result == DNS_R_PARTIALMATCH) {
8211                 result = ISC_R_NOTFOUND;
8212                 rbtdbiter->node = NULL;
8213         }
8214
8215         rbtdbiter->result = result;
8216 #else
8217         if (result == ISC_R_SUCCESS || result == DNS_R_PARTIALMATCH) {
8218                 isc_result_t tresult;
8219                 tresult = dns_rbtnodechain_current(rbtdbiter->current, iname,
8220                                                    origin, NULL);
8221                 if (tresult == ISC_R_SUCCESS) {
8222                         rbtdbiter->new_origin = ISC_TRUE;
8223                         reference_iter_node(rbtdbiter);
8224                 } else {
8225                         result = tresult;
8226                         rbtdbiter->node = NULL;
8227                 }
8228         } else
8229                 rbtdbiter->node = NULL;
8230
8231         rbtdbiter->result = (result == DNS_R_PARTIALMATCH) ?
8232                             ISC_R_SUCCESS : result;
8233 #endif
8234
8235         return (result);
8236 }
8237
8238 static isc_result_t
8239 dbiterator_prev(dns_dbiterator_t *iterator) {
8240         isc_result_t result;
8241         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8242         dns_name_t *name, *origin;
8243         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8244
8245         REQUIRE(rbtdbiter->node != NULL);
8246
8247         if (rbtdbiter->result != ISC_R_SUCCESS)
8248                 return (rbtdbiter->result);
8249
8250         if (rbtdbiter->paused)
8251                 resume_iteration(rbtdbiter);
8252
8253         name = dns_fixedname_name(&rbtdbiter->name);
8254         origin = dns_fixedname_name(&rbtdbiter->origin);
8255         result = dns_rbtnodechain_prev(rbtdbiter->current, name, origin);
8256         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8257             !rbtdbiter->nonsec3 &&
8258             &rbtdbiter->nsec3chain == rbtdbiter->current) {
8259                 rbtdbiter->current = &rbtdbiter->chain;
8260                 dns_rbtnodechain_reset(rbtdbiter->current);
8261                 result = dns_rbtnodechain_last(rbtdbiter->current, rbtdb->tree,
8262                                                name, origin);
8263                 if (result == ISC_R_NOTFOUND)
8264                         result = ISC_R_NOMORE;
8265         }
8266
8267         dereference_iter_node(rbtdbiter);
8268
8269         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8270                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8271                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8272                                                   NULL, &rbtdbiter->node);
8273         }
8274
8275         if (result == ISC_R_SUCCESS)
8276                 reference_iter_node(rbtdbiter);
8277
8278         rbtdbiter->result = result;
8279
8280         return (result);
8281 }
8282
8283 static isc_result_t
8284 dbiterator_next(dns_dbiterator_t *iterator) {
8285         isc_result_t result;
8286         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8287         dns_name_t *name, *origin;
8288         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8289
8290         REQUIRE(rbtdbiter->node != NULL);
8291
8292         if (rbtdbiter->result != ISC_R_SUCCESS)
8293                 return (rbtdbiter->result);
8294
8295         if (rbtdbiter->paused)
8296                 resume_iteration(rbtdbiter);
8297
8298         name = dns_fixedname_name(&rbtdbiter->name);
8299         origin = dns_fixedname_name(&rbtdbiter->origin);
8300         result = dns_rbtnodechain_next(rbtdbiter->current, name, origin);
8301         if (result == ISC_R_NOMORE && !rbtdbiter->nsec3only &&
8302             !rbtdbiter->nonsec3 && &rbtdbiter->chain == rbtdbiter->current) {
8303                 rbtdbiter->current = &rbtdbiter->nsec3chain;
8304                 dns_rbtnodechain_reset(rbtdbiter->current);
8305                 result = dns_rbtnodechain_first(rbtdbiter->current,
8306                                                 rbtdb->nsec3, name, origin);
8307                 if (result == ISC_R_NOTFOUND)
8308                         result = ISC_R_NOMORE;
8309         }
8310
8311         dereference_iter_node(rbtdbiter);
8312
8313         if (result == DNS_R_NEWORIGIN || result == ISC_R_SUCCESS) {
8314                 rbtdbiter->new_origin = ISC_TF(result == DNS_R_NEWORIGIN);
8315                 result = dns_rbtnodechain_current(rbtdbiter->current, NULL,
8316                                                   NULL, &rbtdbiter->node);
8317         }
8318         if (result == ISC_R_SUCCESS)
8319                 reference_iter_node(rbtdbiter);
8320
8321         rbtdbiter->result = result;
8322
8323         return (result);
8324 }
8325
8326 static isc_result_t
8327 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
8328                    dns_name_t *name)
8329 {
8330         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8331         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8332         dns_rbtnode_t *node = rbtdbiter->node;
8333         isc_result_t result;
8334         dns_name_t *nodename = dns_fixedname_name(&rbtdbiter->name);
8335         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8336
8337         REQUIRE(rbtdbiter->result == ISC_R_SUCCESS);
8338         REQUIRE(rbtdbiter->node != NULL);
8339
8340         if (rbtdbiter->paused)
8341                 resume_iteration(rbtdbiter);
8342
8343         if (name != NULL) {
8344                 if (rbtdbiter->common.relative_names)
8345                         origin = NULL;
8346                 result = dns_name_concatenate(nodename, origin, name, NULL);
8347                 if (result != ISC_R_SUCCESS)
8348                         return (result);
8349                 if (rbtdbiter->common.relative_names && rbtdbiter->new_origin)
8350                         result = DNS_R_NEWORIGIN;
8351         } else
8352                 result = ISC_R_SUCCESS;
8353
8354         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8355         new_reference(rbtdb, node);
8356         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8357
8358         *nodep = rbtdbiter->node;
8359
8360         if (iterator->cleaning && result == ISC_R_SUCCESS) {
8361                 isc_result_t expire_result;
8362
8363                 /*
8364                  * If the deletion array is full, flush it before trying
8365                  * to expire the current node.  The current node can't
8366                  * fully deleted while the iteration cursor is still on it.
8367                  */
8368                 if (rbtdbiter->delete == DELETION_BATCH_MAX)
8369                         flush_deletions(rbtdbiter);
8370
8371                 expire_result = expirenode(iterator->db, *nodep, 0);
8372
8373                 /*
8374                  * expirenode() currently always returns success.
8375                  */
8376                 if (expire_result == ISC_R_SUCCESS && node->down == NULL) {
8377                         unsigned int refs;
8378
8379                         rbtdbiter->deletions[rbtdbiter->delete++] = node;
8380                         NODE_STRONGLOCK(&rbtdb->node_locks[node->locknum].lock);
8381                         dns_rbtnode_refincrement(node, &refs);
8382                         INSIST(refs != 0);
8383                         NODE_STRONGUNLOCK(&rbtdb->node_locks[node->locknum].lock);
8384                 }
8385         }
8386
8387         return (result);
8388 }
8389
8390 static isc_result_t
8391 dbiterator_pause(dns_dbiterator_t *iterator) {
8392         dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)iterator->db;
8393         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8394
8395         if (rbtdbiter->result != ISC_R_SUCCESS &&
8396             rbtdbiter->result != ISC_R_NOMORE)
8397                 return (rbtdbiter->result);
8398
8399         if (rbtdbiter->paused)
8400                 return (ISC_R_SUCCESS);
8401
8402         rbtdbiter->paused = ISC_TRUE;
8403
8404         if (rbtdbiter->tree_locked != isc_rwlocktype_none) {
8405                 INSIST(rbtdbiter->tree_locked == isc_rwlocktype_read);
8406                 RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
8407                 rbtdbiter->tree_locked = isc_rwlocktype_none;
8408         }
8409
8410         flush_deletions(rbtdbiter);
8411
8412         return (ISC_R_SUCCESS);
8413 }
8414
8415 static isc_result_t
8416 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name) {
8417         rbtdb_dbiterator_t *rbtdbiter = (rbtdb_dbiterator_t *)iterator;
8418         dns_name_t *origin = dns_fixedname_name(&rbtdbiter->origin);
8419
8420         if (rbtdbiter->result != ISC_R_SUCCESS)
8421                 return (rbtdbiter->result);
8422
8423         return (dns_name_copy(origin, name, NULL));
8424 }
8425
8426 /*%
8427  * Additional cache routines.
8428  */
8429 static isc_result_t
8430 rdataset_getadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8431                        dns_rdatatype_t qtype, dns_acache_t *acache,
8432                        dns_zone_t **zonep, dns_db_t **dbp,
8433                        dns_dbversion_t **versionp, dns_dbnode_t **nodep,
8434                        dns_name_t *fname, dns_message_t *msg,
8435                        isc_stdtime_t now)
8436 {
8437 #ifndef BIND9
8438         UNUSED(rdataset);
8439         UNUSED(type);
8440         UNUSED(qtype);
8441         UNUSED(acache);
8442         UNUSED(zonep);
8443         UNUSED(dbp);
8444         UNUSED(versionp);
8445         UNUSED(nodep);
8446         UNUSED(fname);
8447         UNUSED(msg);
8448         UNUSED(now);
8449
8450         return (ISC_R_NOTIMPLEMENTED);
8451 #else
8452         dns_rbtdb_t *rbtdb = rdataset->private1;
8453         dns_rbtnode_t *rbtnode = rdataset->private2;
8454         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8455         unsigned int current_count = rdataset->privateuint4;
8456         unsigned int count;
8457         rdatasetheader_t *header;
8458         nodelock_t *nodelock;
8459         unsigned int total_count;
8460         acachectl_t *acarray;
8461         dns_acacheentry_t *entry;
8462         isc_result_t result;
8463
8464         UNUSED(qtype); /* we do not use this value at least for now */
8465         UNUSED(acache);
8466
8467         header = (struct rdatasetheader *)(raw - sizeof(*header));
8468
8469         total_count = raw[0] * 256 + raw[1];
8470         INSIST(total_count > current_count);
8471         count = total_count - current_count - 1;
8472
8473         acarray = NULL;
8474
8475         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8476         NODE_LOCK(nodelock, isc_rwlocktype_read);
8477
8478         switch (type) {
8479         case dns_rdatasetadditional_fromauth:
8480                 acarray = header->additional_auth;
8481                 break;
8482         case dns_rdatasetadditional_fromcache:
8483                 acarray = NULL;
8484                 break;
8485         case dns_rdatasetadditional_fromglue:
8486                 acarray = header->additional_glue;
8487                 break;
8488         default:
8489                 INSIST(0);
8490         }
8491
8492         if (acarray == NULL) {
8493                 if (type != dns_rdatasetadditional_fromcache)
8494                         dns_acache_countquerymiss(acache);
8495                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8496                 return (ISC_R_NOTFOUND);
8497         }
8498
8499         if (acarray[count].entry == NULL) {
8500                 dns_acache_countquerymiss(acache);
8501                 NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8502                 return (ISC_R_NOTFOUND);
8503         }
8504
8505         entry = NULL;
8506         dns_acache_attachentry(acarray[count].entry, &entry);
8507
8508         NODE_UNLOCK(nodelock, isc_rwlocktype_read);
8509
8510         result = dns_acache_getentry(entry, zonep, dbp, versionp,
8511                                      nodep, fname, msg, now);
8512
8513         dns_acache_detachentry(&entry);
8514
8515         return (result);
8516 }
8517
8518 static void
8519 acache_callback(dns_acacheentry_t *entry, void **arg) {
8520         dns_rbtdb_t *rbtdb;
8521         dns_rbtnode_t *rbtnode;
8522         nodelock_t *nodelock;
8523         acachectl_t *acarray = NULL;
8524         acache_cbarg_t *cbarg;
8525         unsigned int count;
8526
8527         REQUIRE(arg != NULL);
8528         cbarg = *arg;
8529
8530         /*
8531          * The caller must hold the entry lock.
8532          */
8533
8534         rbtdb = (dns_rbtdb_t *)cbarg->db;
8535         rbtnode = (dns_rbtnode_t *)cbarg->node;
8536
8537         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8538         NODE_LOCK(nodelock, isc_rwlocktype_write);
8539
8540         switch (cbarg->type) {
8541         case dns_rdatasetadditional_fromauth:
8542                 acarray = cbarg->header->additional_auth;
8543                 break;
8544         case dns_rdatasetadditional_fromglue:
8545                 acarray = cbarg->header->additional_glue;
8546                 break;
8547         default:
8548                 INSIST(0);
8549         }
8550
8551         count = cbarg->count;
8552         if (acarray != NULL && acarray[count].entry == entry) {
8553                 acarray[count].entry = NULL;
8554                 INSIST(acarray[count].cbarg == cbarg);
8555                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8556                 acarray[count].cbarg = NULL;
8557         } else
8558                 isc_mem_put(rbtdb->common.mctx, cbarg, sizeof(acache_cbarg_t));
8559
8560         dns_acache_detachentry(&entry);
8561
8562         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8563
8564         dns_db_detachnode((dns_db_t *)rbtdb, (dns_dbnode_t **)(void*)&rbtnode);
8565         dns_db_detach((dns_db_t **)(void*)&rbtdb);
8566
8567         *arg = NULL;
8568 #endif /* BIND9 */
8569 }
8570
8571 #ifdef BIND9
8572 static void
8573 acache_cancelentry(isc_mem_t *mctx, dns_acacheentry_t *entry,
8574                       acache_cbarg_t **cbargp)
8575 {
8576         acache_cbarg_t *cbarg;
8577
8578         REQUIRE(mctx != NULL);
8579         REQUIRE(entry != NULL);
8580         REQUIRE(cbargp != NULL && *cbargp != NULL);
8581
8582         cbarg = *cbargp;
8583
8584         dns_acache_cancelentry(entry);
8585         dns_db_detachnode(cbarg->db, &cbarg->node);
8586         dns_db_detach(&cbarg->db);
8587
8588         isc_mem_put(mctx, cbarg, sizeof(acache_cbarg_t));
8589
8590         *cbargp = NULL;
8591 }
8592 #endif /* BIND9 */
8593
8594 static isc_result_t
8595 rdataset_setadditional(dns_rdataset_t *rdataset, dns_rdatasetadditional_t type,
8596                        dns_rdatatype_t qtype, dns_acache_t *acache,
8597                        dns_zone_t *zone, dns_db_t *db,
8598                        dns_dbversion_t *version, dns_dbnode_t *node,
8599                        dns_name_t *fname)
8600 {
8601 #ifndef BIND9
8602         UNUSED(rdataset);
8603         UNUSED(type);
8604         UNUSED(qtype);
8605         UNUSED(acache);
8606         UNUSED(zone);
8607         UNUSED(db);
8608         UNUSED(version);
8609         UNUSED(node);
8610         UNUSED(fname);
8611
8612         return (ISC_R_NOTIMPLEMENTED);
8613 #else
8614         dns_rbtdb_t *rbtdb = rdataset->private1;
8615         dns_rbtnode_t *rbtnode = rdataset->private2;
8616         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8617         unsigned int current_count = rdataset->privateuint4;
8618         rdatasetheader_t *header;
8619         unsigned int total_count, count;
8620         nodelock_t *nodelock;
8621         isc_result_t result;
8622         acachectl_t *acarray;
8623         dns_acacheentry_t *newentry, *oldentry = NULL;
8624         acache_cbarg_t *newcbarg, *oldcbarg = NULL;
8625
8626         UNUSED(qtype);
8627
8628         if (type == dns_rdatasetadditional_fromcache)
8629                 return (ISC_R_SUCCESS);
8630
8631         header = (struct rdatasetheader *)(raw - sizeof(*header));
8632
8633         total_count = raw[0] * 256 + raw[1];
8634         INSIST(total_count > current_count);
8635         count = total_count - current_count - 1; /* should be private data */
8636
8637         newcbarg = isc_mem_get(rbtdb->common.mctx, sizeof(*newcbarg));
8638         if (newcbarg == NULL)
8639                 return (ISC_R_NOMEMORY);
8640         newcbarg->type = type;
8641         newcbarg->count = count;
8642         newcbarg->header = header;
8643         newcbarg->db = NULL;
8644         dns_db_attach((dns_db_t *)rbtdb, &newcbarg->db);
8645         newcbarg->node = NULL;
8646         dns_db_attachnode((dns_db_t *)rbtdb, (dns_dbnode_t *)rbtnode,
8647                           &newcbarg->node);
8648         newentry = NULL;
8649         result = dns_acache_createentry(acache, (dns_db_t *)rbtdb,
8650                                         acache_callback, newcbarg, &newentry);
8651         if (result != ISC_R_SUCCESS)
8652                 goto fail;
8653         /* Set cache data in the new entry. */
8654         result = dns_acache_setentry(acache, newentry, zone, db,
8655                                      version, node, fname);
8656         if (result != ISC_R_SUCCESS)
8657                 goto fail;
8658
8659         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8660         NODE_LOCK(nodelock, isc_rwlocktype_write);
8661
8662         acarray = NULL;
8663         switch (type) {
8664         case dns_rdatasetadditional_fromauth:
8665                 acarray = header->additional_auth;
8666                 break;
8667         case dns_rdatasetadditional_fromglue:
8668                 acarray = header->additional_glue;
8669                 break;
8670         default:
8671                 INSIST(0);
8672         }
8673
8674         if (acarray == NULL) {
8675                 unsigned int i;
8676
8677                 acarray = isc_mem_get(rbtdb->common.mctx, total_count *
8678                                       sizeof(acachectl_t));
8679
8680                 if (acarray == NULL) {
8681                         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8682                         goto fail;
8683                 }
8684
8685                 for (i = 0; i < total_count; i++) {
8686                         acarray[i].entry = NULL;
8687                         acarray[i].cbarg = NULL;
8688                 }
8689         }
8690         switch (type) {
8691         case dns_rdatasetadditional_fromauth:
8692                 header->additional_auth = acarray;
8693                 break;
8694         case dns_rdatasetadditional_fromglue:
8695                 header->additional_glue = acarray;
8696                 break;
8697         default:
8698                 INSIST(0);
8699         }
8700
8701         if (acarray[count].entry != NULL) {
8702                 /*
8703                  * Swap the entry.  Delay cleaning-up the old entry since
8704                  * it would require a node lock.
8705                  */
8706                 oldentry = acarray[count].entry;
8707                 INSIST(acarray[count].cbarg != NULL);
8708                 oldcbarg = acarray[count].cbarg;
8709         }
8710         acarray[count].entry = newentry;
8711         acarray[count].cbarg = newcbarg;
8712
8713         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8714
8715         if (oldentry != NULL) {
8716                 acache_cancelentry(rbtdb->common.mctx, oldentry, &oldcbarg);
8717                 dns_acache_detachentry(&oldentry);
8718         }
8719
8720         return (ISC_R_SUCCESS);
8721
8722  fail:
8723         if (newcbarg != NULL) {
8724                 if (newentry != NULL) {
8725                         acache_cancelentry(rbtdb->common.mctx, newentry,
8726                                            &newcbarg);
8727                         dns_acache_detachentry(&newentry);
8728                 } else {
8729                         dns_db_detachnode((dns_db_t *)rbtdb, &newcbarg->node);
8730                         dns_db_detach(&newcbarg->db);
8731                         isc_mem_put(rbtdb->common.mctx, newcbarg,
8732                             sizeof(*newcbarg));
8733                 }
8734         }
8735
8736         return (result);
8737 #endif
8738 }
8739
8740 static isc_result_t
8741 rdataset_putadditional(dns_acache_t *acache, dns_rdataset_t *rdataset,
8742                        dns_rdatasetadditional_t type, dns_rdatatype_t qtype)
8743 {
8744 #ifndef BIND9
8745         UNUSED(acache);
8746         UNUSED(rdataset);
8747         UNUSED(type);
8748         UNUSED(qtype);
8749
8750         return (ISC_R_NOTIMPLEMENTED);
8751 #else
8752         dns_rbtdb_t *rbtdb = rdataset->private1;
8753         dns_rbtnode_t *rbtnode = rdataset->private2;
8754         unsigned char *raw = rdataset->private3;        /* RDATASLAB */
8755         unsigned int current_count = rdataset->privateuint4;
8756         rdatasetheader_t *header;
8757         nodelock_t *nodelock;
8758         unsigned int total_count, count;
8759         acachectl_t *acarray;
8760         dns_acacheentry_t *entry;
8761         acache_cbarg_t *cbarg;
8762
8763         UNUSED(qtype);          /* we do not use this value at least for now */
8764         UNUSED(acache);
8765
8766         if (type == dns_rdatasetadditional_fromcache)
8767                 return (ISC_R_SUCCESS);
8768
8769         header = (struct rdatasetheader *)(raw - sizeof(*header));
8770
8771         total_count = raw[0] * 256 + raw[1];
8772         INSIST(total_count > current_count);
8773         count = total_count - current_count - 1;
8774
8775         acarray = NULL;
8776         entry = NULL;
8777
8778         nodelock = &rbtdb->node_locks[rbtnode->locknum].lock;
8779         NODE_LOCK(nodelock, isc_rwlocktype_write);
8780
8781         switch (type) {
8782         case dns_rdatasetadditional_fromauth:
8783                 acarray = header->additional_auth;
8784                 break;
8785         case dns_rdatasetadditional_fromglue:
8786                 acarray = header->additional_glue;
8787                 break;
8788         default:
8789                 INSIST(0);
8790         }
8791
8792         if (acarray == NULL) {
8793                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8794                 return (ISC_R_NOTFOUND);
8795         }
8796
8797         entry = acarray[count].entry;
8798         if (entry == NULL) {
8799                 NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8800                 return (ISC_R_NOTFOUND);
8801         }
8802
8803         acarray[count].entry = NULL;
8804         cbarg = acarray[count].cbarg;
8805         acarray[count].cbarg = NULL;
8806
8807         NODE_UNLOCK(nodelock, isc_rwlocktype_write);
8808
8809         if (entry != NULL) {
8810                 if (cbarg != NULL)
8811                         acache_cancelentry(rbtdb->common.mctx, entry, &cbarg);
8812                 dns_acache_detachentry(&entry);
8813         }
8814
8815         return (ISC_R_SUCCESS);
8816 #endif
8817 }
8818
8819 /*%
8820  * Routines for LRU-based cache management.
8821  */
8822
8823 /*%
8824  * See if a given cache entry that is being reused needs to be updated
8825  * in the LRU-list.  From the LRU management point of view, this function is
8826  * expected to return true for almost all cases.  When used with threads,
8827  * however, this may cause a non-negligible performance penalty because a
8828  * writer lock will have to be acquired before updating the list.
8829  * If DNS_RBTDB_LIMITLRUUPDATE is defined to be non 0 at compilation time, this
8830  * function returns true if the entry has not been updated for some period of
8831  * time.  We differentiate the NS or glue address case and the others since
8832  * experiments have shown that the former tends to be accessed relatively
8833  * infrequently and the cost of cache miss is higher (e.g., a missing NS records
8834  * may cause external queries at a higher level zone, involving more
8835  * transactions).
8836  *
8837  * Caller must hold the node (read or write) lock.
8838  */
8839 static inline isc_boolean_t
8840 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now) {
8841         if ((header->attributes &
8842              (RDATASET_ATTR_NONEXISTENT|RDATASET_ATTR_STALE)) != 0)
8843                 return (ISC_FALSE);
8844
8845 #if DNS_RBTDB_LIMITLRUUPDATE
8846         if (header->type == dns_rdatatype_ns ||
8847             (header->trust == dns_trust_glue &&
8848              (header->type == dns_rdatatype_a ||
8849               header->type == dns_rdatatype_aaaa))) {
8850                 /*
8851                  * Glue records are updated if at least 60 seconds have passed
8852                  * since the previous update time.
8853                  */
8854                 return (header->last_used + 60 <= now);
8855         }
8856
8857         /* Other records are updated if 5 minutes have passed. */
8858         return (header->last_used + 300 <= now);
8859 #else
8860         UNUSED(now);
8861
8862         return (ISC_TRUE);
8863 #endif
8864 }
8865
8866 /*%
8867  * Update the timestamp of a given cache entry and move it to the head
8868  * of the corresponding LRU list.
8869  *
8870  * Caller must hold the node (write) lock.
8871  *
8872  * Note that the we do NOT touch the heap here, as the TTL has not changed.
8873  */
8874 static void
8875 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8876               isc_stdtime_t now)
8877 {
8878         INSIST(IS_CACHE(rbtdb));
8879
8880         /* To be checked: can we really assume this? XXXMLG */
8881         INSIST(ISC_LINK_LINKED(header, link));
8882
8883         ISC_LIST_UNLINK(rbtdb->rdatasets[header->node->locknum], header, link);
8884         header->last_used = now;
8885         ISC_LIST_PREPEND(rbtdb->rdatasets[header->node->locknum], header, link);
8886 }
8887
8888 /*%
8889  * Purge some expired and/or stale (i.e. unused for some period) cache entries
8890  * under an overmem condition.  To recover from this condition quickly, up to
8891  * 2 entries will be purged.  This process is triggered while adding a new
8892  * entry, and we specifically avoid purging entries in the same LRU bucket as
8893  * the one to which the new entry will belong.  Otherwise, we might purge
8894  * entries of the same name of different RR types while adding RRsets from a
8895  * single response (consider the case where we're adding A and AAAA glue records
8896  * of the same NS name).
8897  */
8898 static void
8899 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
8900               isc_stdtime_t now, isc_boolean_t tree_locked)
8901 {
8902         rdatasetheader_t *header, *header_prev;
8903         unsigned int locknum;
8904         int purgecount = 2;
8905
8906         for (locknum = (locknum_start + 1) % rbtdb->node_lock_count;
8907              locknum != locknum_start && purgecount > 0;
8908              locknum = (locknum + 1) % rbtdb->node_lock_count) {
8909                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
8910                           isc_rwlocktype_write);
8911
8912                 header = isc_heap_element(rbtdb->heaps[locknum], 1);
8913                 if (header && header->rdh_ttl <= now - RBTDB_VIRTUAL) {
8914                         expire_header(rbtdb, header, tree_locked);
8915                         purgecount--;
8916                 }
8917
8918                 for (header = ISC_LIST_TAIL(rbtdb->rdatasets[locknum]);
8919                      header != NULL && purgecount > 0;
8920                      header = header_prev) {
8921                         header_prev = ISC_LIST_PREV(header, link);
8922                         /*
8923                          * Unlink the entry at this point to avoid checking it
8924                          * again even if it's currently used someone else and
8925                          * cannot be purged at this moment.  This entry won't be
8926                          * referenced any more (so unlinking is safe) since the
8927                          * TTL was reset to 0.
8928                          */
8929                         ISC_LIST_UNLINK(rbtdb->rdatasets[locknum], header,
8930                                         link);
8931                         expire_header(rbtdb, header, tree_locked);
8932                         purgecount--;
8933                 }
8934
8935                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
8936                                     isc_rwlocktype_write);
8937         }
8938 }
8939
8940 static void
8941 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
8942               isc_boolean_t tree_locked)
8943 {
8944         set_ttl(rbtdb, header, 0);
8945         header->attributes |= RDATASET_ATTR_STALE;
8946         header->node->dirty = 1;
8947
8948         /*
8949          * Caller must hold the node (write) lock.
8950          */
8951
8952         if (dns_rbtnode_refcurrent(header->node) == 0) {
8953                 /*
8954                  * If no one else is using the node, we can clean it up now.
8955                  * We first need to gain a new reference to the node to meet a
8956                  * requirement of decrement_reference().
8957                  */
8958                 new_reference(rbtdb, header->node);
8959                 decrement_reference(rbtdb, header->node, 0,
8960                                     isc_rwlocktype_write,
8961                                     tree_locked ? isc_rwlocktype_write :
8962                                     isc_rwlocktype_none, ISC_FALSE);
8963         }
8964 }