valkey-io · lipzhu · Apr 11, 2024 · Apr 15, 2024 · Apr 22, 2024 · madolson
diff --git a/src/bio.c b/src/bio.c
@@ -215,7 +215,7 @@ void *bioProcessBackgroundJobs(void *arg) {
     serverSetCpuAffinity(server.bio_cpulist);
 
     makeThreadKillable();
-
+    zmalloc_register_thread_index();
     pthread_mutex_lock(&bio_mutex[worker]);
     /* Block SIGALRM so we are sure that only the main thread will
      * receive the watchdog signal. */

diff --git a/src/networking.c b/src/networking.c
@@ -4228,7 +4228,7 @@ void *IOThreadMain(void *myid) {
     redis_set_thread_title(thdname);
     serverSetCpuAffinity(server.server_cpulist);
     makeThreadKillable();
-
+    zmalloc_register_thread_index();
     while(1) {
         /* Wait for start */
         for (int j = 0; j < 1000000; j++) {

diff --git a/src/server.c b/src/server.c
@@ -6950,6 +6950,7 @@ int main(int argc, char **argv) {
     spt_init(argc, argv);
 #endif
     tzset(); /* Populates 'timezone' global. */
+    zmalloc_register_thread_index();
     zmalloc_set_oom_handler(serverOutOfMemoryHandler);
 
     /* To achieve entropy, in case of containers, their time() and getpid() can

diff --git a/src/zmalloc.c b/src/zmalloc.c
@@ -87,10 +87,22 @@ void zlibc_free(void *ptr) {
 #define dallocx(ptr,flags) je_dallocx(ptr,flags)
 #endif
 
-#define update_zmalloc_stat_alloc(__n) atomicIncr(used_memory,(__n))
-#define update_zmalloc_stat_free(__n) atomicDecr(used_memory,(__n))
+#define update_zmalloc_stat_alloc(__n) used_memory_tls[thread_index] += (__n)
+#define update_zmalloc_stat_free(__n) used_memory_tls[thread_index] -= (__n)
 
-static serverAtomic size_t used_memory = 0;
+/* A thread-local storage which keep the current thread's index in the
+ * used_memory_tls array. */
+static __thread int thread_index;
+/* MAX_THREADS_NUM = IO_THREADS_MAX_NUM(128) + BIO threads(3) + main thread(1). */
+#define MAX_THREADS_NUM 132
-#define MAX_THREADS_NUM 132
+#define MAX_THREADS_NUM (IO_THREADS_MAX_NUM + BIO_THREAD_COUNT + 1)
-#define MAX_THREADS_NUM 132
+#define MAX_THREADS_NUM (IO_THREADS_MAX_NUM + BIO_THREAD_COUNT + 1)
+static size_t used_memory_tls[MAX_THREADS_NUM];
+
+static serverAtomic int total_active_threads;
+
+/* Register the thread index in start_routine. */
+void zmalloc_register_thread_index(void) {
+    atomicGetIncr(total_active_threads, thread_index, 1);
-    atomicGetIncr(total_active_threads, thread_index, 1);
+    serverAssert(total_active_threads < MAX_THREADS_NUM);
+    atomicGetIncr(total_active_threads, thread_index, 1);
-    atomicGetIncr(total_active_threads, thread_index, 1);
+    atomicGetIncr(total_active_threads, thread_index, 1);
+    serverAssert(thread_index < MAX_THREADS_NUM);
-    atomicGetIncr(total_active_threads, thread_index, 1);
+    serverAssert(total_active_threads < MAX_THREADS_NUM);
+    atomicGetIncr(total_active_threads, thread_index, 1);
-    atomicGetIncr(total_active_threads, thread_index, 1);
+    atomicGetIncr(total_active_threads, thread_index, 1);
+    serverAssert(thread_index < MAX_THREADS_NUM);
+}
 
 static void zmalloc_default_oom(size_t size) {
     fprintf(stderr, "zmalloc: Out of memory trying to allocate %zu bytes\n",
@@ -409,8 +421,10 @@ char *zstrdup(const char *s) {
 }
 
 size_t zmalloc_used_memory(void) {
-    size_t um;
-    atomicGet(used_memory,um);
+    size_t um = 0;
+    for (int i = 0; i < total_active_threads; i++) {
+        um += used_memory_tls[i];
-        um += used_memory_tls[i];
+        serverAssert(i < MAX_THREADS_NUM);
+        um += used_memory_tls[i];
-        um += used_memory_tls[i];
+        serverAssert(i < MAX_THREADS_NUM);
+        um += used_memory_tls[i];
+    }
     return um;
 }
 

diff --git a/src/zmalloc.h b/src/zmalloc.h
@@ -138,6 +138,7 @@ size_t zmalloc_get_memory_size(void);
 void zlibc_free(void *ptr);
 void zlibc_trim(void);
 void zmadvise_dontneed(void *ptr);
+void zmalloc_register_thread_index(void);
 
 #ifdef HAVE_DEFRAG
 void zfree_no_tcache(void *ptr);