From a5600f88a64eee6b0f8a4a3ce3d440af6fbd99cb Mon Sep 17 00:00:00 2001 From: Severin Leonhardt Date: Fri, 2 Oct 2015 12:52:58 +0200 Subject: [PATCH 1/3] Replace LZCNT with BSR instruction in hdr_histogram The LZCNT instruction produced by __lzcnt on Windows is not available on many CPUs in use today. BSR is a safe replacement requiring only minimal additional work. --- .../hdr_histogram/hdr_histogram.cpp | 31 ++++++++++++++----- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/src/third_party/hdr_histogram/hdr_histogram.cpp b/src/third_party/hdr_histogram/hdr_histogram.cpp index 521fea0e2..477cc919b 100644 --- a/src/third_party/hdr_histogram/hdr_histogram.cpp +++ b/src/third_party/hdr_histogram/hdr_histogram.cpp @@ -23,21 +23,36 @@ #include "hdr_histogram.hpp" -inline int32_t hdr_clz64(uint64_t x) { +/** + * Get the most significant bit set. + * + * The result is a one-based index, zero means no bits are set. + */ +inline int32_t hdr_bsr64(uint64_t x) { #if defined(_MSC_VER) + unsigned long index; # if defined(_M_AMD64) - return (int32_t)__lzcnt64(x); + char isNonzero = _BitScanReverse64(&index, x); + if (isNonzero) + return index + 1; + else + return 0; # else // On 32-bit this needs to be split into two operations - int32_t lz = (int32_t)__lzcnt((unsigned long)(x >> 32)); - if (lz == 32) { + const char isNonzero = _BitScanReverse(&index, (unsigned long)(x >> 32)); + if (isNonzero) + return index + 32 + 1; + else { // Scan the last 32 bits by truncating the 64-bit value - return (int32_t)__lzcnt((unsigned long)x) + 32; + isNonzero = _BitScanReverse(&index, (unsigned long)x); + if (isNonzero) + return index + 1; + else + return 0; } - return lz; # endif #else - return (int32_t)__builtin_clzll(x); + return 64 - (int32_t)__builtin_clzll(x); #endif } @@ -132,7 +147,7 @@ static int64_t power(int64_t base, int64_t exp) static int32_t get_bucket_index(struct hdr_histogram* h, int64_t value) { - int32_t pow2ceiling = 64 - hdr_clz64(value | h->sub_bucket_mask); // smallest power of 2 containing value + int32_t pow2ceiling = hdr_bsr64(value | h->sub_bucket_mask); // smallest power of 2 containing value return pow2ceiling - (int32_t)h->unit_magnitude - (h->sub_bucket_half_count_magnitude + 1); } From f4b75d1831bf4929dbba4d754ba38e06858aa6a1 Mon Sep 17 00:00:00 2001 From: Severin Leonhardt Date: Tue, 6 Oct 2015 09:31:15 +0200 Subject: [PATCH 2/3] Fix constness in hdr_histogram's hdr_bsr64 --- src/third_party/hdr_histogram/hdr_histogram.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/third_party/hdr_histogram/hdr_histogram.cpp b/src/third_party/hdr_histogram/hdr_histogram.cpp index 477cc919b..7525f75d1 100644 --- a/src/third_party/hdr_histogram/hdr_histogram.cpp +++ b/src/third_party/hdr_histogram/hdr_histogram.cpp @@ -32,14 +32,14 @@ inline int32_t hdr_bsr64(uint64_t x) { #if defined(_MSC_VER) unsigned long index; # if defined(_M_AMD64) - char isNonzero = _BitScanReverse64(&index, x); + const char isNonzero = _BitScanReverse64(&index, x); if (isNonzero) return index + 1; else return 0; # else // On 32-bit this needs to be split into two operations - const char isNonzero = _BitScanReverse(&index, (unsigned long)(x >> 32)); + char isNonzero = _BitScanReverse(&index, (unsigned long)(x >> 32)); if (isNonzero) return index + 32 + 1; else { From 0d95364273709652299fac2dd336019f31f26cd2 Mon Sep 17 00:00:00 2001 From: Severin Leonhardt Date: Tue, 6 Oct 2015 10:19:28 +0200 Subject: [PATCH 3/3] Check for 0 before calling __builtin_clzll __builtin_clzll is undefined for 0 so this case must be handled separately. --- src/third_party/hdr_histogram/hdr_histogram.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/third_party/hdr_histogram/hdr_histogram.cpp b/src/third_party/hdr_histogram/hdr_histogram.cpp index 7525f75d1..2bd1ebc2f 100644 --- a/src/third_party/hdr_histogram/hdr_histogram.cpp +++ b/src/third_party/hdr_histogram/hdr_histogram.cpp @@ -52,7 +52,10 @@ inline int32_t hdr_bsr64(uint64_t x) { } # endif #else - return 64 - (int32_t)__builtin_clzll(x); + if (x == 0) + return 0; + else + return 64 - (int32_t)__builtin_clzll(x); #endif }