From a28bc3ebf672113cdd90432ef0270782d8ab6c2c Mon Sep 17 00:00:00 2001 From: Wayne Davison Date: Fri, 22 May 2020 17:40:08 -0700 Subject: [PATCH] Promoting xxhash support. --- checksum.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++ configure.ac | 14 +++++++++- options.c | 8 ++++-- rsync.yo | 5 ++-- 4 files changed, 95 insertions(+), 5 deletions(-) diff --git a/checksum.c b/checksum.c index 99c29d98..17a9507a 100644 --- a/checksum.c +++ b/checksum.c @@ -20,6 +20,9 @@ */ #include "rsync.h" +#ifdef SUPPORT_XXHASH +#include "xxhash.h" +#endif extern int am_server; extern int local_server; @@ -36,6 +39,7 @@ extern char *checksum_choice; #define CSUM_MD4_OLD 3 #define CSUM_MD4 4 #define CSUM_MD5 5 +#define CSUM_XXHASH 6 #define CSUM_SAW_BUFLEN 10 @@ -43,6 +47,9 @@ struct csum_struct { int num; const char *name; } valid_checksums[] = { +#ifdef SUPPORT_XXHASH + { CSUM_XXHASH, "xxhash" }, +#endif { CSUM_MD5, "md5" }, { CSUM_MD4, "md4" }, { CSUM_NONE, "none" }, @@ -247,6 +254,10 @@ int csum_len_for_type(int cst, BOOL flist_csum) return MD4_DIGEST_LEN; case CSUM_MD5: return MD5_DIGEST_LEN; +#ifdef SUPPORT_XXHASH + case CSUM_XXHASH: + return sizeof (XXH64_hash_t); +#endif default: /* paranoia to prevent missing case values */ exit_cleanup(RERR_UNSUPPORTED); } @@ -345,6 +356,11 @@ void get_checksum2(char *buf, int32 len, char *sum) mdfour_result(&m, (uchar *)sum); break; } +#ifdef SUPPORT_XXHASH + case CSUM_XXHASH: + SIVAL64(sum, 0, XXH64(buf, len, checksum_seed)); + break; +#endif default: /* paranoia to prevent missing case values */ exit_cleanup(RERR_UNSUPPORTED); } @@ -401,6 +417,34 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) mdfour_result(&m, (uchar *)sum); break; +#ifdef SUPPORT_XXHASH + case CSUM_XXHASH: { + XXH64_state_t* state = XXH64_createState(); + if (state == NULL) + out_of_memory("file_checksum xx64"); + + if (XXH64_reset(state, 0) == XXH_ERROR) { + rprintf(FERROR, "error resetting XXH64 seed"); + exit_cleanup(RERR_STREAMIO); + } + + for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) { + XXH_errorcode const updateResult = + XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK); + if (updateResult == XXH_ERROR) { + rprintf(FERROR, "error computing XX64 hash"); + exit_cleanup(RERR_STREAMIO); + } + } + remainder = (int32)(len - i); + if (remainder > 0) + XXH64_update(state, (uchar *)map_ptr(buf, i, CSUM_CHUNK), remainder); + SIVAL64(sum, 0, XXH64_digest(state)); + + XXH64_freeState(state); + break; + } +#endif default: rprintf(FERROR, "invalid checksum-choice for the --checksum option (%d)\n", checksum_type); exit_cleanup(RERR_UNSUPPORTED); @@ -413,6 +457,9 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) static int32 sumresidue; static md_context md; static int cursum_type; +#ifdef SUPPORT_XXHASH +XXH64_state_t* xxh64_state = NULL; +#endif void sum_init(int csum_type, int seed) { @@ -438,6 +485,19 @@ void sum_init(int csum_type, int seed) SIVAL(s, 0, seed); sum_update(s, 4); break; +#ifdef SUPPORT_XXHASH + case CSUM_XXHASH: + if (xxh64_state == NULL) { + xxh64_state = XXH64_createState(); + if (xxh64_state == NULL) + out_of_memory("sum_init xxh64"); + } + if (XXH64_reset(xxh64_state, 0) == XXH_ERROR) { + rprintf(FERROR, "error resetting XXH64 state"); + exit_cleanup(RERR_STREAMIO); + } + break; +#endif case CSUM_NONE: break; default: /* paranoia to prevent missing case values */ @@ -487,6 +547,14 @@ void sum_update(const char *p, int32 len) if (sumresidue) memcpy(md.buffer, p, sumresidue); break; +#ifdef SUPPORT_XXHASH + case CSUM_XXHASH: + if (XXH64_update(xxh64_state, p, len) == XXH_ERROR) { + rprintf(FERROR, "error computing XX64 hash"); + exit_cleanup(RERR_STREAMIO); + } + break; +#endif case CSUM_NONE: break; default: /* paranoia to prevent missing case values */ @@ -515,6 +583,11 @@ int sum_end(char *sum) mdfour_update(&md, (uchar *)md.buffer, sumresidue); mdfour_result(&md, (uchar *)sum); break; +#ifdef SUPPORT_XXHASH + case CSUM_XXHASH: + SIVAL64(sum, 0, XXH64_digest(xxh64_state)); + break; +#endif case CSUM_NONE: *sum = '\0'; break; diff --git a/configure.ac b/configure.ac index 554cf4ac..394f5b52 100644 --- a/configure.ac +++ b/configure.ac @@ -381,9 +381,21 @@ AC_CHECK_HEADERS(sys/fcntl.h sys/select.h fcntl.h sys/time.h sys/unistd.h \ netdb.h malloc.h float.h limits.h iconv.h libcharset.h langinfo.h \ sys/acl.h acl/libacl.h attr/xattr.h sys/xattr.h sys/extattr.h \ popt.h popt/popt.h linux/falloc.h netinet/in_systm.h netinet/ip.h \ - zlib.h) + zlib.h xxhash.h) AC_HEADER_MAJOR_FIXED +dnl Do you want to disable use of xxhash checksums +AC_ARG_ENABLE([xxhash], + AS_HELP_STRING([--disable-xxhash],[disable xxhash checksums])) +AH_TEMPLATE([SUPPORT_XXHASH], +[Undefine if you do not want xxhash checksums. By default this is defined.]) +if test x"$enable_xxhash" != x"no"; then + if test x"$ac_cv_header_xxhash_h" = x"yes"; then + AC_SEARCH_LIBS(XXH64_createState, xxhash) + AC_DEFINE(SUPPORT_XXHASH) + fi +fi + AC_CACHE_CHECK([if makedev takes 3 args],rsync_cv_MAKEDEV_TAKES_3_ARGS,[ AC_RUN_IFELSE([AC_LANG_SOURCE([[ #include diff --git a/options.c b/options.c index 6926f96c..959c4205 100644 --- a/options.c +++ b/options.c @@ -580,6 +580,7 @@ static void print_rsync_version(enum logcode f) char const *iconv = "no "; char const *ipv6 = "no "; char const *simd = "no "; + char const *xxhash = "no "; STRUCT_STAT *dumstat; #if SUBPROTOCOL_VERSION != 0 @@ -619,6 +620,9 @@ static void print_rsync_version(enum logcode f) #ifdef HAVE_SIMD simd = ""; #endif +#ifdef SUPPORT_XXHASH + xxhash = ""; +#endif rprintf(f, "%s version %s protocol version %d%s\n", RSYNC_NAME, RSYNC_VERSION, PROTOCOL_VERSION, subprotocol); @@ -632,8 +636,8 @@ static void print_rsync_version(enum logcode f) (int)(sizeof (int64) * 8)); rprintf(f, " %ssocketpairs, %shardlinks, %ssymlinks, %sIPv6, batchfiles, %sinplace,\n", got_socketpair, hardlinks, links, ipv6, have_inplace); - rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %sprealloc, %sSIMD\n", - have_inplace, acls, xattrs, iconv, symtimes, prealloc, simd); + rprintf(f, " %sappend, %sACLs, %sxattrs, %siconv, %ssymtimes, %sprealloc, %sSIMD, %sxxhash\n", + have_inplace, acls, xattrs, iconv, symtimes, prealloc, simd, xxhash); #ifdef MAINTAINER_MODE rprintf(f, "Panic Action: \"%s\"\n", get_panic_action()); diff --git a/rsync.yo b/rsync.yo index a1917cf6..a7e36423 100644 --- a/rsync.yo +++ b/rsync.yo @@ -1376,7 +1376,7 @@ and (assuming bf(--checksum) is specified) the pre-transfer checksums. If two comma-separated names are supplied, the first name affects the transfer checksums, and the second name affects the pre-transfer checksums (bf(-c)). -The algorithm choices are "auto", "MD5", "MD4", and "none". +The algorithm choices are "auto", "xxhash", "MD5", "MD4", and "none". If "none" is specified for the first (or only) name, the bf(--whole-file) option is forced on and no checksum verification is performed on the transferred data. @@ -1389,7 +1389,8 @@ negotation between the client and the server as follows: If both the client and the server are at least version 3.2.0, they will exchange a list of checksum names and choose the first one in the list that they have in common. -This typically means that they will choose MD5. +This typically means that they will choose xxhash if they both support it +and fall back to MD5. If one side of the transfer is not new enough to support this checksum negotation, then a value is chosen based on the protocol version (which chooses between MD5 and various flavors of MD4 based on protocol age). -- 2.34.1