Skip to content

Commit

Permalink
Address review comments
Browse files Browse the repository at this point in the history
Signed-off-by: Josiah Carlson <josiah.carlson@gmail.com>
  • Loading branch information
josiahcarlson committed Apr 24, 2024
1 parent 09cae3b commit 302cb2a
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 19 deletions.
11 changes: 8 additions & 3 deletions README.md
Expand Up @@ -54,10 +54,15 @@ installed):
% ./utils/gen-test-certs.sh
% ./runtest --tls

To build and test crc 64 performance improvements:
To build and and run internal server tests (otherwise Valkey will complain
about missing "test" configuration):

% make distclean && make SEVER_TEST=yes
% src/valkey-server test [all|crc64|dict|...]

Which is also required to test a range of CRC64 cutoffs for your CPU:
% src/valkey-server test crc64 --crc 10000000

% make distclean && make SERVER_TEST=yes
% src/valkey-server test crc64 --crc 10000000

Fixing build problems with dependencies or cached build options
---------
Expand Down
6 changes: 3 additions & 3 deletions src/Makefile
Expand Up @@ -36,9 +36,6 @@ NODEPS:=clean distclean

# Default settings
STD=-pedantic -DSERVER_STATIC=''
ifeq ($(SERVER_TEST),yes)
STD +=-DSERVER_TEST=1
endif

# Use -Wno-c11-extensions on clang, either where explicitly used or on
# platforms we can assume it's being used.
Expand Down Expand Up @@ -134,6 +131,9 @@ ifdef REDIS_LDFLAGS
endif

FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(SERVER_CFLAGS)
ifeq ($(SERVER_TEST),yes)
FINAL_CFLAGS +=-DSERVER_TEST=1
endif
FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(SERVER_LDFLAGS) $(DEBUG)
FINAL_LIBS=-lm
DEBUG=-g -ggdb
Expand Down
1 change: 1 addition & 0 deletions src/crc64.c
Expand Up @@ -75,6 +75,7 @@ static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
* 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C)
*/

ASSERT(data_len <= 64);
/* swap odd and even bits */
data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1);
/* swap consecutive pairs */
Expand Down
27 changes: 14 additions & 13 deletions src/crccombine.c
Expand Up @@ -5,12 +5,16 @@
#include "crccombine.h"

/* Copyright (C) 2013 Mark Adler
* Copyright (C) 2019-2023 Josiah Carlson
* Copyright (C) 2019-2024 Josiah Carlson
* Portions originally from: crc64.c Version 1.4 16 Dec 2013 Mark Adler
* Modifications by Josiah Carlson <josiah.carlson@gmail.com>
* - Added implementation variations with sample timings for gf_matrix_times*()
* - Most folks would be best using gf2_matrix_times_vec or
* gf2_matrix_times_vec2, unless some processor does AVX2 fast.
* - This is the implementation of the MERGE_CRC macro defined in
* crcspeed.c (which calls crc_combine()), and is a specialization of the
* generic crc_combine() (and related from the 2013 edition of Mark Adler's
* crc64.c)) for the sake of clarity and performance.
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
Expand Down Expand Up @@ -39,15 +43,6 @@
from repeating the same information over and over.
*/

/* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */
#define MASKS_2() \
static v2uq masks2[4] = { \
{0,0}, \
{-1,0}, \
{0,-1}, \
{-1,-1}, \
}

uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) {
/*
* Uses xmm registers on x86, works basically everywhere fast, doing
Expand All @@ -58,7 +53,13 @@ uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) {
*/
v2uq sum = {0, 0},
*mv2 = (v2uq*)mat;
MASKS_2();
/* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */
static v2uq masks2[4] = {
{0,0},
{-1,0},
{0,-1},
{-1,-1},
};

/* Almost as beautiful as gf2_matrix_times_vec, but only half as many
* bits per step, so we need 2 per chunk4 operation. Faster in my tests. */
Expand Down Expand Up @@ -87,7 +88,6 @@ uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) {

#undef DO_CHUNK16
#undef DO_CHUNK4
#undef MASKS_2


static void gf2_matrix_square(uint64_t *square, uint64_t *mat, uint8_t dim) {
Expand All @@ -106,7 +106,8 @@ static uint64_t combine_cache[64][64];
* okay to spend the 32k of memory here, leaving the algorithm unchanged from
* as it was a decade ago, and be happy that it costs <200 microseconds to
* init, and that subsequent calls to the combine function take under 100
* nanoseconds.
* nanoseconds. We also note that the crcany/crc.c code applies to any CRC, and
* we are currently targeting one: Jones CRC64.
*/

void init_combine_cache(uint64_t poly, uint8_t dim) {
Expand Down

0 comments on commit 302cb2a

Please sign in to comment.