Tweak the hashtable routines to be a little clearer and easier.
authorWayne Davison <wayne@opencoder.net>
Fri, 12 Jun 2020 23:39:29 +0000 (16:39 -0700)
committerWayne Davison <wayne@opencoder.net>
Sat, 13 Jun 2020 00:42:41 +0000 (17:42 -0700)
flist.c
hashtable.c
hlink.c
rsync.h
xattrs.c

diff --git a/flist.c b/flist.c
index dbb0f92182a5f64e9df3628a32a3a5e251128ad8..bbc028bae952a63c23f62178c767e155ade1d390 100644 (file)
--- a/flist.c
+++ b/flist.c
@@ -491,9 +491,9 @@ static void send_file_entry(int f, const char *fname, struct file_struct *file,
        if (tmp_dev != -1) {
                if (protocol_version >= 30) {
                        struct ht_int64_node *np = idev_find(tmp_dev, tmp_ino);
-                       first_hlink_ndx = (int32)(long)np->data - 1;
+                       first_hlink_ndx = (int32)(long)np->data; /* is -1 when new */
                        if (first_hlink_ndx < 0) {
-                               np->data = (void*)(long)(first_ndx + ndx + 1);
+                               np->data = (void*)(long)(first_ndx + ndx);
                                xflags |= XMIT_HLINK_FIRST;
                        }
                        if (DEBUG_GTE(HLINK, 1)) {
@@ -1101,10 +1101,10 @@ static struct file_struct *recv_file_entry(int f, struct file_list *flist, int x
                                ino = read_longint(f);
                        }
                        np = idev_find(dev, ino);
-                       ndx = (int32)(long)np->data - 1;
+                       ndx = (int32)(long)np->data; /* is -1 when new */
                        if (ndx < 0) {
-                               ndx = cnt++;
                                np->data = (void*)(long)cnt;
+                               ndx = cnt++;
                        }
                        F_HL_GNUM(file) = ndx;
                }
index 00c057db52774075220621cd336b383866044113..52f0fa07c3664114e73a73c6cd68d9d3d20dd8ed 100644 (file)
@@ -66,9 +66,19 @@ void hashtable_destroy(struct hashtable *tbl)
        free(tbl);
 }
 
-/* This returns the node for the indicated key, either newly created or
- * already existing.  Returns NULL if not allocating and not found. */
-void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing)
+/* Returns the node that holds the indicated key if it exists. When it does not
+ * exist, it returns either NULL (when data_when_new is NULL), or it returns a
+ * new node with its node->data set to the indicated value.
+ *
+ * If your code doesn't know the data value for a new node in advance (usually
+ * because it doesn't know if a node is new or not) you should pass in a unique
+ * (non-0) value that you can use to check if the returned node is new. You can
+ * then overwrite the data with any value you want (even 0) since it only needs
+ * to be different than whatever data_when_new value you use later on.
+ *
+ * This return is a void* just because it might be pointing at a ht_int32_node
+ * or a ht_int64_node, and that makes the caller's assignment a little easier. */
+void *hashtable_find(struct hashtable *tbl, int64 key, void *data_when_new)
 {
        int key64 = tbl->key64;
        struct ht_int32_node *node;
@@ -79,7 +89,7 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing)
                exit_cleanup(RERR_MESSAGEIO);
        }
 
-       if (allocate_if_missing && tbl->entries > HASH_LOAD_LIMIT(tbl->size)) {
+       if (data_when_new && tbl->entries > HASH_LOAD_LIMIT(tbl->size)) {
                void *old_nodes = tbl->nodes;
                int size = tbl->size * 2;
                int i;
@@ -99,8 +109,12 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing)
                        int64 move_key = HT_KEY(move_node, key64);
                        if (move_key == 0)
                                continue;
-                       node = hashtable_find(tbl, move_key, 1);
-                       node->data = move_node->data;
+                       if (move_node->data)
+                               hashtable_find(tbl, move_key, move_node->data);
+                       else {
+                               node = hashtable_find(tbl, move_key, "");
+                               node->data = 0;
+                       }
                }
 
                free(old_nodes);
@@ -155,7 +169,7 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing)
                if (nkey == key)
                        return node;
                if (nkey == 0) {
-                       if (!allocate_if_missing)
+                       if (!data_when_new)
                                return NULL;
                        break;
                }
@@ -167,6 +181,7 @@ void *hashtable_find(struct hashtable *tbl, int64 key, int allocate_if_missing)
                ((struct ht_int64_node*)node)->key = key;
        else
                node->key = (int32)key;
+       node->data = data_when_new;
        tbl->entries++;
        return node;
 }
diff --git a/hlink.c b/hlink.c
index 6c5ea61ad64a363d34331d25a2b9ac832f2440ad..29927166ab0a333ea0440df0022c6a838f3fa15f 100644 (file)
--- a/hlink.c
+++ b/hlink.c
@@ -48,6 +48,8 @@ extern struct file_list *cur_flist;
  * we can avoid the pool of dev+inode data.  For incremental recursion mode,
  * the receiver will use a ndx hash to remember old pathnames. */
 
+static void *data_when_new = "";
+
 static struct hashtable *dev_tbl;
 
 static struct hashtable *prior_hlinks;
@@ -57,32 +59,30 @@ static struct file_list *hlink_flist;
 void init_hard_links(void)
 {
        if (am_sender || protocol_version < 30)
-               dev_tbl = hashtable_create(16, 1);
+               dev_tbl = hashtable_create(16, HT_KEY64);
        else if (inc_recurse)
-               prior_hlinks = hashtable_create(1024, 0);
+               prior_hlinks = hashtable_create(1024, HT_KEY32);
 }
 
 struct ht_int64_node *idev_find(int64 dev, int64 ino)
 {
        static struct ht_int64_node *dev_node = NULL;
-       struct hashtable *tbl;
 
        /* Note that some OSes have a dev == 0, so increment to avoid storing a 0. */
        if (!dev_node || dev_node->key != dev+1) {
                /* We keep a separate hash table of inodes for every device. */
-               dev_node = hashtable_find(dev_tbl, dev+1, 1);
-               if (!(tbl = dev_node->data)) {
-                       tbl = dev_node->data = hashtable_create(512, 1);
+               dev_node = hashtable_find(dev_tbl, dev+1, data_when_new);
+               if (dev_node->data == data_when_new) {
+                       dev_node->data = hashtable_create(512, HT_KEY64);
                        if (DEBUG_GTE(HLINK, 3)) {
                                rprintf(FINFO,
                                    "[%s] created hashtable for dev %s\n",
                                    who_am_i(), big_num(dev));
                        }
                }
-       } else
-               tbl = dev_node->data;
+       }
 
-       return hashtable_find(tbl, ino, 1);
+       return hashtable_find(dev_node->data, ino, (void*)-1L);
 }
 
 void idev_destroy(void)
@@ -125,8 +125,8 @@ static void match_gnums(int32 *ndx_list, int ndx_count)
                file = hlink_flist->sorted[ndx_list[from]];
                gnum = F_HL_GNUM(file);
                if (inc_recurse) {
-                       node = hashtable_find(prior_hlinks, gnum, 1);
-                       if (!node->data) {
+                       node = hashtable_find(prior_hlinks, gnum, data_when_new);
+                       if (node->data == data_when_new) {
                                if (!(node->data = new_array0(char, 5)))
                                        out_of_memory("match_gnums");
                                assert(gnum >= hlink_flist->ndx_start);
@@ -269,7 +269,7 @@ static char *check_prior(struct file_struct *file, int gnum,
        }
 
        if (inc_recurse
-        && (node = hashtable_find(prior_hlinks, gnum, 0)) != NULL) {
+        && (node = hashtable_find(prior_hlinks, gnum, NULL)) != NULL) {
                assert(node->data != NULL);
                if (CVAL(node->data, 0) != 0) {
                        *prev_ndx_p = -1;
@@ -528,7 +528,7 @@ void finish_hard_link(struct file_struct *file, const char *fname, int fin_ndx,
 
        if (inc_recurse) {
                int gnum = F_HL_GNUM(file);
-               struct ht_int32_node *node = hashtable_find(prior_hlinks, gnum, 0);
+               struct ht_int32_node *node = hashtable_find(prior_hlinks, gnum, NULL);
                if (node == NULL) {
                        rprintf(FERROR, "Unable to find a hlink node for %d (%s)\n", gnum, f_name(file, prev_name));
                        exit_cleanup(RERR_MESSAGEIO);
diff --git a/rsync.h b/rsync.h
index 06f6d338359eed1aa6c793abb07dd1ddf65d9f12..f5350da87811b2ac0dfb55baad4c61901a6789f6 100644 (file)
--- a/rsync.h
+++ b/rsync.h
@@ -631,6 +631,9 @@ typedef unsigned int size_t;
 # define SIZEOF_INT64 SIZEOF_OFF_T
 #endif
 
+#define HT_KEY32 0
+#define HT_KEY64 1
+
 struct hashtable {
        void *nodes;
        int32 size, entries;
index 2afa3473d6a60aca47413c1f8cd4adb5034fe052..9016aa4ce1bc4b480d0e5c8aee2a5bc5babf1158 100644 (file)
--- a/xattrs.c
+++ b/xattrs.c
@@ -415,7 +415,7 @@ static int find_matching_xattr(const item_list *xalp)
 
        key = xattr_lookup_hash(xalp);
 
-       node = hashtable_find(rsync_xal_h, key, 0);
+       node = hashtable_find(rsync_xal_h, key, NULL);
        if (node == NULL)
                return -1;
 
@@ -478,21 +478,17 @@ static int rsync_xal_store(item_list *xalp)
        new_list->key = xattr_lookup_hash(&new_list->xa_items);
 
        if (rsync_xal_h == NULL)
-               rsync_xal_h = hashtable_create(512, 1);
+               rsync_xal_h = hashtable_create(512, HT_KEY64);
        if (rsync_xal_h == NULL)
                out_of_memory("rsync_xal_h hashtable_create()");
 
-       node = hashtable_find(rsync_xal_h, new_list->key, 1);
-       if (node == NULL)
-               out_of_memory("rsync_xal_h hashtable_find()");
-
        new_ref = new0(rsync_xa_list_ref);
        if (new_ref == NULL)
                out_of_memory("new0(rsync_xa_list_ref)");
-
        new_ref->ndx = ndx;
 
-       if (node->data != NULL) {
+       node = hashtable_find(rsync_xal_h, new_list->key, new_ref);
+       if (node->data != (void*)new_ref) {
                rsync_xa_list_ref *ref = node->data;
 
                while (ref != NULL) {
@@ -504,8 +500,7 @@ static int rsync_xal_store(item_list *xalp)
                        ref->next = new_ref;
                        break;
                }
-       } else
-               node->data = new_ref;
+       }
 
        return ndx;
 }
@@ -926,7 +921,7 @@ void uncache_tmp_xattrs(void)
                        if (rsync_xal_h == NULL)
                                continue;
 
-                       node = hashtable_find(rsync_xal_h, xa_list_item->key, 0);
+                       node = hashtable_find(rsync_xal_h, xa_list_item->key, NULL);
                        if (node == NULL)
                                continue;