Skip to content

Commit c55ada9

Browse files
BillyONealras0219-msft
authored andcommitted
Add branchless is_alnum borrowed from MSVC++'s std::regex' _Is_word; should be about 5x faster. (microsoft#823)
The _Is_word change resulted in the following results in microbenchmarks; the previous is_alnum looks like branching_ranges. .\word_character_test.exe 08/01/18 16:33:03 Running .\word_character_test.exe Run on (12 X 2904 MHz CPU s) CPU Caches: L1 Data 32K (x6) L1 Instruction 32K (x6) L2 Unified 262K (x6) L3 Unified 12582K (x1) -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- strchr_search 19426572900 ns 19421875000 ns 1 branching_ranges 7582129000 ns 7578125000 ns 1 branching_search 6592977800 ns 6593750000 ns 1 table_index 1091321300 ns 1078125000 ns 1
1 parent 35f721d commit c55ada9

File tree

1 file changed

+39
-4
lines changed

1 file changed

+39
-4
lines changed

Release/include/cpprest/asyncrt_utils.h

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <system_error>
2020
#include <random>
2121
#include <locale.h>
22+
#include <limits.h>
2223
#include "pplx/pplxtasks.h"
2324
#include "cpprest/details/basic_types.h"
2425

@@ -356,11 +357,45 @@ namespace details
356357
/// Our own implementation of alpha numeric instead of std::isalnum to avoid
357358
/// taking global lock for performance reasons.
358359
/// </summary>
359-
inline bool __cdecl is_alnum(char ch)
360+
inline bool __cdecl is_alnum(const unsigned char uch) noexcept
361+
{ // test if uch is an alnum character
362+
// special casing char to avoid branches
363+
static constexpr bool is_alnum_table[UCHAR_MAX + 1] =
364+
{
365+
/* X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 XA XB XC XD XE XF */
366+
/* 0X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
367+
/* 1X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
368+
/* 2X */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
369+
/* 3X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, /* 0-9 */
370+
/* 4X */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* A-Z */
371+
/* 5X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
372+
/* 6X */ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* a-z */
373+
/* 7X */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
374+
/* non-ASCII values initialized to 0 */
375+
};
376+
return (is_alnum_table[uch]);
377+
}
378+
379+
/// <summary>
380+
/// Our own implementation of alpha numeric instead of std::isalnum to avoid
381+
/// taking global lock for performance reasons.
382+
/// </summary>
383+
inline bool __cdecl is_alnum(const char ch) noexcept
384+
{
385+
return (is_alnum(static_cast<unsigned char>(ch)));
386+
}
387+
388+
/// <summary>
389+
/// Our own implementation of alpha numeric instead of std::isalnum to avoid
390+
/// taking global lock for performance reasons.
391+
/// </summary>
392+
template<class Elem>
393+
inline bool __cdecl is_alnum(Elem ch) noexcept
360394
{
361-
return (ch >= '0' && ch <= '9')
362-
|| (ch >= 'A' && ch <= 'Z')
363-
|| (ch >= 'a' && ch <= 'z');
395+
// assumes 'x' == L'x' for the ASCII range
396+
typedef typename std::make_unsigned<Elem>::type UElem;
397+
const auto uch = static_cast<UElem>(ch);
398+
return (uch <= static_cast<UElem>('z') && is_alnum(static_cast<unsigned char>(uch)));
364399
}
365400

366401
/// <summary>

0 commit comments

Comments
 (0)