Remove bypassed checksums in --inplace to improve speed.

author Wayne Davison <wayned@samba.org>

Sat, 3 Aug 2013 16:44:13 +0000 (09:44 -0700)

committer Wayne Davison <wayned@samba.org>

Sat, 3 Aug 2013 16:59:38 +0000 (09:59 -0700)
author Wayne Davison <wayned@samba.org>
Sat, 3 Aug 2013 16:44:13 +0000 (09:44 -0700)
committer Wayne Davison <wayned@samba.org>
Sat, 3 Aug 2013 16:59:38 +0000 (09:59 -0700)
diff --git a/NEWS b/NEWS

index 040ac2d756776d7e3d1584a6647ea06238d300bd..eec631d373dd9fe957919a591f268a3549267fa7 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -154,6 +154,9 @@ Changes since 3.0.9:
        file for one way to package the resulting files.  (Suggestions for
        how to make this even easier to install & use are welcomed.)
  
+    - Improved the speed of some --inplace updates when there are lots of
+      identical checksum blocks that end up being unsuable.
+
      - Added the --outbuf=N|L|B option for chosing the output buffering.
  
      - Repating the --fuzzy option now causes the code to look for fuzzy matches
diff --git a/match.c b/match.c

index bafab9f35292445c9fb20ccea789013eef375a87..a8bd1f304f44983925c1c8b5b1664b0c7fbf8192 100644 (file)
--- a/match.c
+++ b/match.c
@@ -178,7 +178,8 @@ static void hash_search(int f,struct sum_struct *s,
  
         do {
                 int done_csum2 = 0;
-               int32 i;
+               uint32 hash_entry;
+               int32 i, *prev;
  
                 if (DEBUG_GTE(DELTASUM, 4)) {
                         rprintf(FINFO, "offset=%s sum=%04x%04x\n",
@@ -186,19 +187,32 @@ static void hash_search(int f,struct sum_struct *s,
                 }
  
                 if (tablesize == TRADITIONAL_TABLESIZE) {
-                       if ((i = hash_table[SUM2HASH2(s1,s2)]) < 0)
+                       hash_entry = SUM2HASH2(s1,s2);
+                       if ((i = hash_table[hash_entry]) < 0)
                                 goto null_hash;
                         sum = (s1 & 0xffff) | (s2 << 16);
                 } else {
                         sum = (s1 & 0xffff) | (s2 << 16);
-                       if ((i = hash_table[BIG_SUM2HASH(sum)]) < 0)
+                       hash_entry = BIG_SUM2HASH(sum);
+                       if ((i = hash_table[hash_entry]) < 0)
                                 goto null_hash;
                 }
+               prev = &hash_table[hash_entry];
  
                 hash_hits++;
                 do {
                         int32 l;
  
+                       /* When updating in-place, the chunk's offset must be
+                        * either >= our offset or identical data at that offset.
+                        * Remove any bypassed entries that we can never use. */
+                       if (updating_basis_file && s->sums[i].offset < offset
+                           && !(s->sums[i].flags & SUMFLG_SAME_OFFSET)) {
+                               *prev = s->sums[i].chain;
+                               continue;
+                       }
+                       prev = &s->sums[i].chain;
+
                         if (sum != s->sums[i].sum1)
                                 continue;
  
@@ -207,12 +221,6 @@ static void hash_search(int f,struct sum_struct *s,
                         if (l != s->sums[i].len)
                                 continue;
  
-                       /* in-place: ensure chunk's offset is either >= our
-                        * offset or that the data didn't move. */
-                       if (updating_basis_file && s->sums[i].offset < offset
-                           && !(s->sums[i].flags & SUMFLG_SAME_OFFSET))
-                               continue;
-
                         if (DEBUG_GTE(DELTASUM, 3)) {
                                 rprintf(FINFO,
                                         "potential match at %s i=%ld sum=%08x\n",
author	Wayne Davison <wayned@samba.org>
	Sat, 3 Aug 2013 16:44:13 +0000 (09:44 -0700)
committer	Wayne Davison <wayned@samba.org>
	Sat, 3 Aug 2013 16:59:38 +0000 (09:59 -0700)