From 302cb2a886f494b896730d8b9f4c632d269c9b0b Mon Sep 17 00:00:00 2001 From: Josiah Carlson Date: Wed, 24 Apr 2024 15:38:19 -0700 Subject: [PATCH] Address review comments Signed-off-by: Josiah Carlson --- README.md | 11 ++++++++--- src/Makefile | 6 +++--- src/crc64.c | 1 + src/crccombine.c | 27 ++++++++++++++------------- 4 files changed, 26 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index bff8be5d9..b07e0d772 100644 --- a/README.md +++ b/README.md @@ -54,10 +54,15 @@ installed): % ./utils/gen-test-certs.sh % ./runtest --tls -To build and test crc 64 performance improvements: +To build and and run internal server tests (otherwise Valkey will complain +about missing "test" configuration): + + % make distclean && make SEVER_TEST=yes + % src/valkey-server test [all|crc64|dict|...] + +Which is also required to test a range of CRC64 cutoffs for your CPU: + % src/valkey-server test crc64 --crc 10000000 - % make distclean && make SERVER_TEST=yes - % src/valkey-server test crc64 --crc 10000000 Fixing build problems with dependencies or cached build options --------- diff --git a/src/Makefile b/src/Makefile index a10ff1d5d..acdae9d40 100644 --- a/src/Makefile +++ b/src/Makefile @@ -36,9 +36,6 @@ NODEPS:=clean distclean # Default settings STD=-pedantic -DSERVER_STATIC='' -ifeq ($(SERVER_TEST),yes) - STD +=-DSERVER_TEST=1 -endif # Use -Wno-c11-extensions on clang, either where explicitly used or on # platforms we can assume it's being used. @@ -134,6 +131,9 @@ ifdef REDIS_LDFLAGS endif FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(SERVER_CFLAGS) +ifeq ($(SERVER_TEST),yes) + FINAL_CFLAGS +=-DSERVER_TEST=1 +endif FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(SERVER_LDFLAGS) $(DEBUG) FINAL_LIBS=-lm DEBUG=-g -ggdb diff --git a/src/crc64.c b/src/crc64.c index 9df29aaaa..26faa173b 100644 --- a/src/crc64.c +++ b/src/crc64.c @@ -75,6 +75,7 @@ static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) { * 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C) */ + ASSERT(data_len <= 64); /* swap odd and even bits */ data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1); /* swap consecutive pairs */ diff --git a/src/crccombine.c b/src/crccombine.c index 8aab7ab34..5fcb693fb 100644 --- a/src/crccombine.c +++ b/src/crccombine.c @@ -5,12 +5,16 @@ #include "crccombine.h" /* Copyright (C) 2013 Mark Adler - * Copyright (C) 2019-2023 Josiah Carlson + * Copyright (C) 2019-2024 Josiah Carlson * Portions originally from: crc64.c Version 1.4 16 Dec 2013 Mark Adler * Modifications by Josiah Carlson * - Added implementation variations with sample timings for gf_matrix_times*() * - Most folks would be best using gf2_matrix_times_vec or * gf2_matrix_times_vec2, unless some processor does AVX2 fast. + * - This is the implementation of the MERGE_CRC macro defined in + * crcspeed.c (which calls crc_combine()), and is a specialization of the + * generic crc_combine() (and related from the 2013 edition of Mark Adler's + * crc64.c)) for the sake of clarity and performance. This software is provided 'as-is', without any express or implied warranty. In no event will the author be held liable for any damages @@ -39,15 +43,6 @@ from repeating the same information over and over. */ -/* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */ -#define MASKS_2() \ - static v2uq masks2[4] = { \ - {0,0}, \ - {-1,0}, \ - {0,-1}, \ - {-1,-1}, \ - } - uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) { /* * Uses xmm registers on x86, works basically everywhere fast, doing @@ -58,7 +53,13 @@ uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) { */ v2uq sum = {0, 0}, *mv2 = (v2uq*)mat; - MASKS_2(); + /* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */ + static v2uq masks2[4] = { + {0,0}, + {-1,0}, + {0,-1}, + {-1,-1}, + }; /* Almost as beautiful as gf2_matrix_times_vec, but only half as many * bits per step, so we need 2 per chunk4 operation. Faster in my tests. */ @@ -87,7 +88,6 @@ uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) { #undef DO_CHUNK16 #undef DO_CHUNK4 -#undef MASKS_2 static void gf2_matrix_square(uint64_t *square, uint64_t *mat, uint8_t dim) { @@ -106,7 +106,8 @@ static uint64_t combine_cache[64][64]; * okay to spend the 32k of memory here, leaving the algorithm unchanged from * as it was a decade ago, and be happy that it costs <200 microseconds to * init, and that subsequent calls to the combine function take under 100 - * nanoseconds. + * nanoseconds. We also note that the crcany/crc.c code applies to any CRC, and + * we are currently targeting one: Jones CRC64. */ void init_combine_cache(uint64_t poly, uint8_t dim) {