Ticket #8509: uuid_simd.patch

File uuid_simd.patch, 15.4 KB (added by Andrey Semashev, 9 years ago)

Updated patch that adds optimizations for SSE and C++11 to boost::uuid.

  • boost/uuid/uuid.hpp

     
    2828//  28 Nov 2009 - disabled deprecated warnings for MSVC
    2929//  30 Nov 2009 - used BOOST_STATIC_CONSTANT
    3030//  02 Dec 2009 - removed BOOST_STATIC_CONSTANT - not all compilers like it
     31//  29 Apr 2013 - added support for noexcept and constexpr, added optimizations for SSE/AVX
    3132
    3233#ifndef BOOST_UUID_HPP
    3334#define BOOST_UUID_HPP
    3435
    35 #include <boost/config.hpp>
    36 #include <stddef.h>
     36#include <cstddef>
    3737#include <boost/cstdint.hpp>
    38 #include <algorithm>
    39 #include <boost/config.hpp> // for static assert
     38#include <boost/uuid/detail/config.hpp>
    4039#ifndef BOOST_UUID_NO_TYPE_TRAITS
    4140#include <boost/type_traits/is_pod.hpp>
    4241#include <boost/type_traits/integral_constant.hpp>
    4342#endif
    4443
     44#ifdef BOOST_HAS_PRAGMA_ONCE
     45#pragma once
     46#endif
     47
    4548#if defined(_MSC_VER)
    4649#pragma warning(push) // Save warning settings.
    4750#pragma warning(disable : 4996) // Disable deprecated std::swap_ranges, std::equal
     
    6972    typedef std::ptrdiff_t difference_type;
    7073
    7174    // This does not work on some compilers
    72     // They seem to want the variable definec in 
     75    // They seem to want the variable definec in
    7376    // a cpp file
    7477    //BOOST_STATIC_CONSTANT(size_type, static_size = 16);
    75     static size_type static_size() { return 16; }
     78    static BOOST_CONSTEXPR size_type static_size() BOOST_NOEXCEPT { return 16; }
    7679
    7780public:
    78     iterator begin() { return data; } /* throw() */
    79     const_iterator begin() const { return data; } /* throw() */
    80     iterator end() { return data+size(); } /* throw() */
    81     const_iterator end() const { return data+size(); } /* throw() */
     81    iterator begin() BOOST_NOEXCEPT { return data; }
     82    const_iterator begin() const BOOST_NOEXCEPT { return data; }
     83    iterator end() BOOST_NOEXCEPT { return data+size(); }
     84    const_iterator end() const BOOST_NOEXCEPT { return data+size(); }
    8285
    83     size_type size() const { return static_size(); } /* throw() */
     86    BOOST_CONSTEXPR size_type size() const BOOST_NOEXCEPT { return static_size(); }
    8487
    85     bool is_nil() const /* throw() */
    86     {
    87         for(size_t i=0; i<static_size(); i++) {
    88             if (data[i] != 0U) {
    89                 return false;
    90             }
    91         }
    92         return true;
    93     }
     88    bool is_nil() const BOOST_NOEXCEPT;
    9489
    9590    enum variant_type
    9691    {
     
    9994        variant_microsoft, // Microsoft Corporation backward compatibility
    10095        variant_future // future definition
    10196    };
    102     variant_type variant() const /* throw() */
     97    variant_type variant() const BOOST_NOEXCEPT
    10398    {
    10499        // variant is stored in octet 7
    105100        // which is index 8, since indexes count backwards
     
    115110            return variant_future;
    116111        }
    117112    }
    118    
    119     enum version_type 
     113
     114    enum version_type
    120115    {
    121116        version_unknown = -1,
    122117        version_time_based = 1,
     
    125120        version_random_number_based = 4,
    126121        version_name_based_sha1 = 5
    127122    };
    128     version_type version() const /* throw() */
     123    version_type version() const BOOST_NOEXCEPT
    129124    {
    130         //version is stored in octet 9
     125        // version is stored in octet 9
    131126        // which is index 6, since indexes count backwards
    132         unsigned char octet9 = data[6];
     127        uint8_t octet9 = data[6];
    133128        if ( (octet9 & 0xF0) == 0x10 ) {
    134129            return version_time_based;
    135130        } else if ( (octet9 & 0xF0) == 0x20 ) {
     
    146141    }
    147142
    148143    // note: linear complexity
    149     void swap(uuid& rhs) /* throw() */
    150     {
    151         std::swap_ranges(begin(), end(), rhs.begin());
    152     }
     144    void swap(uuid& rhs) BOOST_NOEXCEPT;
    153145
    154146public:
    155147    // or should it be array<uint8_t, 16>
    156148    uint8_t data[16];
    157149};
    158150
    159 inline bool operator==(uuid const& lhs, uuid const& rhs) /* throw() */
    160 {
    161     return std::equal(lhs.begin(), lhs.end(), rhs.begin());
    162 }
     151bool operator== (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT;
     152bool operator< (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT;
    163153
    164 inline bool operator!=(uuid const& lhs, uuid const& rhs) /* throw() */
     154inline bool operator!=(uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
    165155{
    166156    return !(lhs == rhs);
    167157}
    168158
    169 inline bool operator<(uuid const& lhs, uuid const& rhs) /* throw() */
     159inline bool operator>(uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
    170160{
    171     return std::lexicographical_compare(lhs.begin(), lhs.end(), rhs.begin(), rhs.end());
    172 }
    173 
    174 inline bool operator>(uuid const& lhs, uuid const& rhs) /* throw() */
    175 {
    176161    return rhs < lhs;
    177162}
    178 inline bool operator<=(uuid const& lhs, uuid const& rhs) /* throw() */
     163inline bool operator<=(uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
    179164{
    180165    return !(rhs < lhs);
    181166}
    182167
    183 inline bool operator>=(uuid const& lhs, uuid const& rhs) /* throw() */
     168inline bool operator>=(uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
    184169{
    185170    return !(lhs < rhs);
    186171}
    187172
    188 inline void swap(uuid& lhs, uuid& rhs) /* throw() */
     173inline void swap(uuid& lhs, uuid& rhs) BOOST_NOEXCEPT
    189174{
    190175    lhs.swap(rhs);
    191176}
    192177
    193178// This is equivalent to boost::hash_range(u.begin(), u.end());
    194 inline std::size_t hash_value(uuid const& u) /* throw() */
     179inline std::size_t hash_value(uuid const& u) BOOST_NOEXCEPT
    195180{
    196181    std::size_t seed = 0;
    197     for(uuid::const_iterator i=u.begin(); i != u.end(); ++i)
     182    for(uuid::const_iterator i=u.begin(), e=u.end(); i != e; ++i)
    198183    {
    199184        seed ^= static_cast<std::size_t>(*i) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
    200185    }
     
    214199} // namespace boost
    215200#endif
    216201
     202#if defined(BOOST_UUID_USE_SSE2)
     203#include <boost/uuid/detail/uuid_x86.hpp>
     204#else
     205#include <boost/uuid/detail/uuid_generic.hpp>
     206#endif
     207
    217208#if defined(_MSC_VER)
    218209#pragma warning(pop) // Restore warnings to previous state.
    219210#endif
  • boost/uuid/detail/config.hpp

     
     1/*
     2 *            Copyright Andrey Semashev 2013.
     3 * Distributed under the Boost Software License, Version 1.0.
     4 *    (See accompanying file LICENSE_1_0.txt or copy at
     5 *          http://www.boost.org/LICENSE_1_0.txt)
     6 */
     7/*!
     8 * \file   uuid/detail/config.hpp
     9 *
     10 * \brief  This header defines configuration macros for Boost.UUID.
     11 */
     12
     13#ifndef BOOST_UUID_DETAIL_CONFIG_HPP_INCLUDED_
     14#define BOOST_UUID_DETAIL_CONFIG_HPP_INCLUDED_
     15
     16#include <boost/config.hpp>
     17
     18#ifdef BOOST_HAS_PRAGMA_ONCE
     19#pragma once
     20#endif
     21
     22#if !defined(BOOST_UUID_NO_SIMD)
     23
     24#if defined(__GNUC__) && defined(__SSE2__)
     25
     26// GCC and its pretenders go here
     27#ifndef BOOST_UUID_USE_SSE2
     28#define BOOST_UUID_USE_SSE2
     29#endif
     30
     31#if defined(__SSE3__) && !defined(BOOST_UUID_USE_SSE3)
     32#define BOOST_UUID_USE_SSE3
     33#endif
     34
     35#if defined(__SSE4_1__) && !defined(BOOST_UUID_USE_SSE41)
     36#define BOOST_UUID_USE_SSE41
     37#endif
     38
     39#elif defined(_MSC_VER) && (defined(_M_X64) || (defined(_M_IX86) && defined(_M_IX86_FP) && _M_IX86_FP >= 2))
     40
     41#ifndef BOOST_UUID_USE_SSE2
     42#define BOOST_UUID_USE_SSE2
     43#endif
     44
     45#elif !defined(BOOST_UUID_USE_SSE41) && !defined(BOOST_UUID_USE_SSE3) && !defined(BOOST_UUID_USE_SSE2)
     46
     47#define BOOST_UUID_NO_SIMD
     48
     49#endif
     50
     51// More advanced ISA extensions imply less advanced are also available
     52#if !defined(BOOST_UUID_USE_SSE3) && defined(BOOST_UUID_USE_SSE41)
     53#define BOOST_UUID_USE_SSE3
     54#endif
     55
     56#if !defined(BOOST_UUID_USE_SSE2) && defined(BOOST_UUID_USE_SSE3)
     57#define BOOST_UUID_USE_SSE2
     58#endif
     59
     60#endif // !defined(BOOST_UUID_NO_SIMD)
     61
     62#endif // BOOST_UUID_DETAIL_CONFIG_HPP_INCLUDED_
  • boost/uuid/detail/uuid_x86.hpp

    Property changes on: boost/uuid/detail/config.hpp
    ___________________________________________________________________
    Added: svn:mime-type
    ## -0,0 +1 ##
    +text/plain
    \ No newline at end of property
    Added: svn:keywords
    ## -0,0 +1 ##
    +Id
    \ No newline at end of property
    Added: svn:eol-style
    ## -0,0 +1 ##
    +native
    \ No newline at end of property
     
     1/*
     2 *            Copyright Andrey Semashev 2013.
     3 * Distributed under the Boost Software License, Version 1.0.
     4 *    (See accompanying file LICENSE_1_0.txt or copy at
     5 *          http://www.boost.org/LICENSE_1_0.txt)
     6 */
     7/*!
     8 * \file   uuid/detail/uuid_x86.hpp
     9 *
     10 * \brief  This header contains optimized SSE implementation of \c boost::uuid operations.
     11 */
     12
     13#ifndef BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
     14#define BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
     15
     16// MSVC does not always have immintrin.h (at least, not up to MSVC 10), so include the appropriate header for each instruction set
     17#if defined(BOOST_UUID_USE_SSE41)
     18#include <smmintrin.h>
     19#elif defined(BOOST_UUID_USE_SSE3)
     20#include <pmmintrin.h>
     21#else
     22#include <emmintrin.h>
     23#endif
     24
     25namespace boost {
     26namespace uuids {
     27namespace detail {
     28
     29BOOST_FORCEINLINE __m128i load_unaligned_si128(const uint8_t* p) BOOST_NOEXCEPT
     30{
     31#if defined(BOOST_UUID_USE_SSE3)
     32    return _mm_lddqu_si128(reinterpret_cast< const __m128i* >(p));
     33#else
     34    return _mm_loadu_si128(reinterpret_cast< const __m128i* >(p));
     35#endif
     36}
     37
     38} // namespace detail
     39
     40inline bool uuid::is_nil() const BOOST_NOEXCEPT
     41{
     42    register __m128i mm = uuids::detail::load_unaligned_si128(data);
     43#if defined(BOOST_UUID_USE_SSE41)
     44    return _mm_test_all_zeros(mm, mm) != 0;
     45#else
     46    mm = _mm_cmpeq_epi8(mm, _mm_setzero_si128());
     47    return _mm_movemask_epi8(mm) == 0xFFFF;
     48#endif
     49}
     50
     51inline void uuid::swap(uuid& rhs) BOOST_NOEXCEPT
     52{
     53    register __m128i mm_this = uuids::detail::load_unaligned_si128(data);
     54    register __m128i mm_rhs = uuids::detail::load_unaligned_si128(rhs.data);
     55    _mm_storeu_si128(reinterpret_cast< __m128i* >(rhs.data), mm_this);
     56    _mm_storeu_si128(reinterpret_cast< __m128i* >(data), mm_rhs);
     57}
     58
     59inline bool operator== (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
     60{
     61    register __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
     62    register __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
     63
     64    register __m128i mm_cmp = _mm_cmpeq_epi32(mm_left, mm_right);
     65#if defined(BOOST_UUID_USE_SSE41)
     66    return _mm_test_all_ones(mm_cmp);
     67#else
     68    return _mm_movemask_epi8(mm_cmp) == 0xFFFF;
     69#endif
     70}
     71
     72inline bool operator< (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
     73{
     74    register __m128i mm_left = uuids::detail::load_unaligned_si128(lhs.data);
     75    register __m128i mm_right = uuids::detail::load_unaligned_si128(rhs.data);
     76
     77    // To emulate lexicographical_compare behavior we have to perform two comparisons - the forward and reverse one.
     78    // Then we know which bytes are equivalent and which ones are different, and for those different the comparison results
     79    // will be opposite. Then we'll be able to find the first differing comparison result (for both forward and reverse ways),
     80    // and depending on which way it is for, this will be the result of the operation. There are a few notes to consider:
     81    //
     82    // 1. Due to little endian byte order the first bytes go into the lower part of the xmm registers,
     83    //    so the comparison results in the least significant bits will actually be the most signigicant for the final operation result.
     84    //    This means we have to determine which of the comparison results have the least significant bit on, and this is achieved with
     85    //    the "(x - 1) ^ x" trick.
     86    // 2. Because there is only signed comparison in SSE/AVX, we have to invert byte comparison results whenever signs of the corresponding
     87    //    bytes are different. I.e. in signed comparison it's -1 < 1, but in unsigned it is the opposite (255 > 1). To do that we XOR left and right,
     88    //    making the most significant bit of each byte 1 if the signs are different, and later apply this mask with another XOR to the comparison results.
     89    // 3. pcmpgtw compares for "greater" relation, so we swap the arguments to get what we need.
     90
     91    const __m128i mm_signs_mask = _mm_xor_si128(mm_left, mm_right);
     92
     93    __m128i mm_cmp = _mm_cmpgt_epi8(mm_right, mm_left), mm_rcmp = _mm_cmpgt_epi8(mm_left, mm_right);
     94
     95    mm_cmp = _mm_xor_si128(mm_signs_mask, mm_cmp);
     96    mm_rcmp = _mm_xor_si128(mm_signs_mask, mm_rcmp);
     97
     98    uint32_t cmp = static_cast< uint32_t >(_mm_movemask_epi8(mm_cmp)), rcmp = static_cast< uint32_t >(_mm_movemask_epi8(mm_rcmp));
     99
     100    cmp = (cmp - 1u) ^ cmp;
     101    rcmp = (rcmp - 1u) ^ rcmp;
     102
     103    return static_cast< uint16_t >(cmp) < static_cast< uint16_t >(rcmp);
     104}
     105
     106} // namespace uuids
     107} // namespace boost
     108
     109#endif // BOOST_UUID_DETAIL_UUID_X86_HPP_INCLUDED_
  • boost/uuid/detail/uuid_generic.hpp

    Property changes on: boost/uuid/detail/uuid_x86.hpp
    ___________________________________________________________________
    Added: svn:eol-style
    ## -0,0 +1 ##
    +native
    \ No newline at end of property
    Added: svn:mime-type
    ## -0,0 +1 ##
    +text/plain
    \ No newline at end of property
    Added: svn:keywords
    ## -0,0 +1 ##
    +Id
    \ No newline at end of property
     
     1/*
     2 *             Copyright Andy Tompkins 2006.
     3 * Distributed under the Boost Software License, Version 1.0.
     4 *    (See accompanying file LICENSE_1_0.txt or copy at
     5 *          http://www.boost.org/LICENSE_1_0.txt)
     6 */
     7/*!
     8 * \file   uuid/detail/uuid_generic.hpp
     9 *
     10 * \brief  This header contains generic implementation of \c boost::uuid operations.
     11 */
     12
     13#ifndef BOOST_UUID_DETAIL_UUID_GENERIC_HPP_INCLUDED_
     14#define BOOST_UUID_DETAIL_UUID_GENERIC_HPP_INCLUDED_
     15
     16#include <string.h>
     17
     18namespace boost {
     19namespace uuids {
     20
     21inline bool uuid::is_nil() const BOOST_NOEXCEPT
     22{
     23    for (std::size_t i = 0; i < sizeof(data); ++i)
     24    {
     25        if (data[i] != 0U)
     26            return false;
     27    }
     28    return true;
     29}
     30
     31inline void uuid::swap(uuid& rhs) BOOST_NOEXCEPT
     32{
     33    uuid tmp = *this;
     34    *this = rhs;
     35    rhs = tmp;
     36}
     37
     38inline bool operator== (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
     39{
     40    return memcmp(lhs.data, rhs.data, sizeof(lhs.data)) == 0;
     41}
     42
     43inline bool operator< (uuid const& lhs, uuid const& rhs) BOOST_NOEXCEPT
     44{
     45    return memcmp(lhs.data, rhs.data, sizeof(lhs.data)) < 0;
     46}
     47
     48} // namespace uuids
     49} // namespace boost
     50
     51#endif // BOOST_UUID_DETAIL_UUID_GENERIC_HPP_INCLUDED_
  • libs/uuid/test/test_uuid.cpp

    Property changes on: boost/uuid/detail/uuid_generic.hpp
    ___________________________________________________________________
    Added: svn:mime-type
    ## -0,0 +1 ##
    +text/plain
    \ No newline at end of property
    Added: svn:keywords
    ## -0,0 +1 ##
    +Id
    \ No newline at end of property
    Added: svn:eol-style
    ## -0,0 +1 ##
    +native
    \ No newline at end of property
     
    156156        uuid u1 = {{0}};
    157157        uuid u2 = {{1,0}};
    158158        uuid u3 = {{255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}};
     159        uuid u4 = {{0,1,0}};
     160        uuid u5 = {{0,255,0}};
    159161
    160162        BOOST_TEST_EQ(u1, u1);
    161163
     
    163165   
    164166        BOOST_TEST(u1 < u2);
    165167        BOOST_TEST(u2 < u3);
     168        BOOST_TEST(u1 < u4);
     169        BOOST_TEST(u1 < u5);
     170        BOOST_TEST(u4 < u5);
     171        BOOST_TEST(u4 < u2);
     172        BOOST_TEST(u5 < u2);
    166173
    167174        BOOST_TEST(u1 <= u1);
    168175        BOOST_TEST(u1 <= u2);