Ticket #8509: uuid_operators.cpp

File uuid_operators.cpp, 5.8 KB (added by Andrey Semashev, 9 years ago)

Benchmarking code

Line 
1#include <time.h>
2#include <cstring>
3#include <memory>
4#include <iomanip>
5#include <iostream>
6#include <algorithm>
7#include <boost/config.hpp>
8#include <boost/uuid/uuid.hpp>
9#include <boost/uuid/uuid_generators.hpp>
10#if defined(BOOST_WINDOWS)
11#define WIN32_LEAN_AND_MEAN
12#include <windows.h>
13#endif
14#if defined(_MSC_VER)
15// MSVC does not always have immintrin.h (at least, not up to MSVC 10), and we cannot detect the target higher than SSE2 anyway, so just include this header
16#include <emmintrin.h>
17#else
18#include <immintrin.h>
19#endif
20
21using boost::uuids::uuid;
22
23bool stock_equal(uuid const& left, uuid const& right)
24{
25 return std::equal(left.begin(), left.end(), right.begin());
26}
27
28bool mem_equal(uuid const& left, uuid const& right)
29{
30 return std::memcmp(left.data, right.data, 16) == 0;
31}
32
33bool simd_equal(uuid const& left, uuid const& right)
34{
35#if defined(__SSE3__)
36 __m128i mm_left = _mm_lddqu_si128(reinterpret_cast< const __m128i* >(left.data));
37 __m128i mm_right = _mm_lddqu_si128(reinterpret_cast< const __m128i* >(right.data));
38#else
39 __m128i mm_left = _mm_loadu_si128(reinterpret_cast< const __m128i* >(left.data));
40 __m128i mm_right = _mm_loadu_si128(reinterpret_cast< const __m128i* >(right.data));
41#endif
42 __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
43#if defined(__SSE4_1__)
44 return _mm_test_all_ones(mm_cmp);
45#else
46 return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
47#endif
48}
49
50
51bool stock_less(uuid const& left, uuid const& right)
52{
53 return std::lexicographical_compare(left.begin(), left.end(), right.begin(), right.end());
54}
55
56bool mem_less(uuid const& left, uuid const& right)
57{
58 return std::memcmp(left.data, right.data, 16) < 0;
59}
60
61bool simd_less(uuid const& left, uuid const& right)
62{
63#if defined(__SSE3__)
64 __m128i mm_left = _mm_lddqu_si128(reinterpret_cast< const __m128i* >(left.data));
65 __m128i mm_right = _mm_lddqu_si128(reinterpret_cast< const __m128i* >(right.data));
66#else
67 __m128i mm_left = _mm_loadu_si128(reinterpret_cast< const __m128i* >(left.data));
68 __m128i mm_right = _mm_loadu_si128(reinterpret_cast< const __m128i* >(right.data));
69#endif
70
71 // To emulate lexicographical_compare behavior we have to perform two comparisons - the forward and reverse one.
72 // Then we know which bytes are equivalent and which ones are different, and for those different the comparison results
73 // will be opposite. Then we'll be able to find the first differing comparison result (for both forward and reverse ways),
74 // and depending on which way it is for, this will be the result of the operation. There are a few notes to consider:
75 //
76 // 1. Due to little endian byte order the first bytes go into the lower part of the xmm registers,
77 // so the comparison results in the least significant bits will actually be the most signigicant for the final operation result.
78 // This means we have to determine which of the comparison results have the least significant bit on, and this is achieved with
79 // the "(x - 1) ^ x" trick.
80 // 2. Because there is only signed comparison in SSE/AVX, we have to operate on 16 bit integers. We zero-extend bytes to 16 bit words
81 // before comparison and then pack the comparison results back to 8 bits. For AVX2 we can still do the comparison in one instruction though.
82 // 3. pcmpgtw compares for "greater" relation, so we swap the arguments to get what we need.
83 const __m128i mm_0 = _mm_setzero_si128();
84 __m128i mm_left_lo = _mm_unpacklo_epi8(mm_left, mm_0), mm_right_lo = _mm_unpacklo_epi8(mm_right, mm_0);
85 __m128i mm_left_hi = _mm_unpackhi_epi8(mm_left, mm_0), mm_right_hi = _mm_unpackhi_epi8(mm_right, mm_0);
86
87 __m128i mm_cmp_lo = _mm_cmpgt_epi16(mm_right_lo, mm_left_lo), mm_cmp_hi = _mm_cmpgt_epi16(mm_right_hi, mm_left_hi);
88 __m128i mm_rcmp_lo = _mm_cmpgt_epi16(mm_left_lo, mm_right_lo), mm_rcmp_hi = _mm_cmpgt_epi16(mm_left_hi, mm_right_hi);
89 __m128i mm_cmp = _mm_packs_epi16(mm_cmp_lo, mm_cmp_hi);
90 __m128i mm_rcmp = _mm_packs_epi16(mm_rcmp_lo, mm_rcmp_hi);
91
92 boost::uint32_t cmp = _mm_movemask_epi8(mm_cmp), rcmp = _mm_movemask_epi8(mm_rcmp);
93 cmp = (cmp - 1u) ^ cmp;
94 rcmp = (rcmp - 1u) ^ rcmp;
95
96 return static_cast< boost::uint16_t >(cmp) < static_cast< boost::uint16_t >(rcmp);
97}
98
99typedef bool equal_t(uuid const& left, uuid const& right);
100
101const unsigned int iterations = 100000000u;
102
103void test_performance(uuid const& left, uuid const& right, const char* name, equal_t* eq)
104{
105#if !defined(BOOST_WINDOWS)
106 struct timespec start = {}, end = {};
107 clock_gettime(CLOCK_REALTIME, &start);
108#else
109 LARGE_INTEGER start = {}, end = {};
110 QueryPerformanceCounter(&start);
111#endif
112
113 for (volatile unsigned int i = 0; i < iterations; ++i)
114 {
115 eq(left, right);
116 }
117
118#if !defined(BOOST_WINDOWS)
119 clock_gettime(CLOCK_REALTIME, &end);
120
121 boost::uint64_t duration = (end.tv_sec - start.tv_sec) * 1000000000ull + (end.tv_nsec - start.tv_nsec);
122#else
123 QueryPerformanceCounter(&end);
124 LARGE_INTEGER freq = {};
125 QueryPerformanceFrequency(&freq);
126
127 boost::uint64_t duration = (end.QuadPart - start.QuadPart) * 1000000000ull / freq.QuadPart;
128#endif
129
130 std::cout << name << " duration: " << duration << " ns" << std::endl;
131}
132
133void run_tests(uuid const& left, uuid const& right)
134{
135 test_performance(left, right, "stock_equal", &stock_equal);
136 test_performance(left, right, "mem_equal", &mem_equal);
137 test_performance(left, right, "simd_equal", &simd_equal);
138
139 test_performance(left, right, "stock_less", &stock_less);
140 test_performance(left, right, "mem_less", &mem_less);
141 test_performance(left, right, "simd_less", &simd_less);
142}
143
144int main(int, char*[])
145{
146 boost::uuids::random_generator gen;
147
148 {
149 std::cout << "Values placed on stack:" << std::endl;
150 uuid left = gen();
151 uuid right = gen();
152 run_tests(left, right);
153 }
154 {
155 std::cout << "\nValues placed on heap:" << std::endl;
156 std::auto_ptr< uuid > pleft(new uuid(gen()));
157 std::auto_ptr< uuid > pright(new uuid(gen()));
158 run_tests(*pleft, *pright);
159 }
160
161 return 0;
162}
163