Ticket #8574: atomic_x86.patch

File atomic_x86.patch, 5.8 KB (added by Andrey Semashev, 9 years ago)

The patch with fixes and optimizations for 32 bit x86

  • boost/atomic/detail/gcc-x86.hpp

     
    2828
    2929#define BOOST_ATOMIC_X86_PAUSE() __asm__ __volatile__ ("pause\n")
    3030
     31#if defined(__i386__) &&\
     32    (\
     33        defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8) ||\
     34        defined(__i586__) || defined(__i686__) || defined(__pentium4__) || defined(__nocona__) || defined(__core2__) || defined(__corei7__) ||\
     35        defined(__k6__) || defined(__athlon__) || defined(__k8__) || defined(__amdfam10__) || defined(__bdver1__) || defined(__bdver2__) || defined(__bdver3__) || defined(__btver1__) || defined(__btver2__)\
     36    )
     37#define BOOST_ATOMIC_X86_HAS_CMPXCHG8B 1
     38#endif
     39
    3140inline void
    3241platform_fence_before(memory_order order)
    3342{
     
    198207#define BOOST_ATOMIC_INT_LOCK_FREE 2
    199208#define BOOST_ATOMIC_LONG_LOCK_FREE 2
    200209
    201 #if defined(__x86_64__)
     210#if defined(__x86_64__) || defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
    202211#define BOOST_ATOMIC_LLONG_LOCK_FREE 2
    203212#else
    204 #define BOOST_ATOMIC_LLONG_LOCK_FREE 1
     213#define BOOST_ATOMIC_LLONG_LOCK_FREE 0
    205214#endif
    206215
    207216#define BOOST_ATOMIC_POINTER_LOCK_FREE 2
     
    16181627};
    16191628#endif
    16201629
    1621 #if !defined(__x86_64__) && (defined(__i686__) || defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8))
     1630#if !defined(__x86_64__) && defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
    16221631
    16231632template<typename T>
    16241633inline bool
    1625 platform_cmpxchg64_strong(T & expected, T desired, volatile T * ptr)
     1634platform_cmpxchg64_strong(T & expected, T desired, volatile T * ptr) BOOST_NOEXCEPT
    16261635{
    16271636#ifdef __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8
    16281637    const T oldval = __sync_val_compare_and_swap(ptr, expected, desired);
     
    16301639    expected = oldval;
    16311640    return result;
    16321641#else
    1633     int scratch;
     1642    uint32_t scratch;
    16341643    T prev = expected;
    16351644    /* Make sure ebx is saved and restored properly in case
    16361645    this object is compiled as "position independent". Since
     
    16521661        "lock; cmpxchg8b 0(%4)\n"
    16531662        "movl %1, %%ebx\n"
    16541663        : "=A" (prev), "=m" (scratch)
    1655         : "D" ((int)desired), "c" ((int)(desired >> 32)), "S" (ptr), "0" (prev)
     1664        : "D" ((uint32_t)desired), "c" ((uint32_t)(desired >> 32)), "S" (ptr), "0" (prev)
    16561665        : "memory");
    16571666    bool success = (prev == expected);
    16581667    expected = prev;
     
    16601669#endif
    16611670}
    16621671
     1672// Intel 64 and IA-32 Architectures Software Developer's Manual, Volume 3A, 8.1.1. Guaranteed Atomic Operations:
     1673//
     1674// The Pentium processor (and newer processors since) guarantees that the following additional memory operations will always be carried out atomically:
     1675// * Reading or writing a quadword aligned on a 64-bit boundary
     1676//
     1677// Luckily, the memory is almost always 8-byte aligned in our case because atomic<> uses 64 bit native types for storage and dynamic memory allocations
     1678// have at least 8 byte alignment. The only unfortunate case is when atomic is placeod on the stack and it is not 8-byte aligned (like on 32 bit Windows).
     1679
    16631680template<typename T>
    16641681inline void
    1665 platform_store64(T value, volatile T * ptr)
     1682platform_store64(T value, volatile T * ptr) BOOST_NOEXCEPT
    16661683{
    1667     T expected = *ptr;
    1668     for (; !platform_cmpxchg64_strong(expected, value, ptr);)
     1684    if (((uint32_t)ptr & 0x00000007) == 0)
    16691685    {
    1670         BOOST_ATOMIC_X86_PAUSE();
     1686#if defined(__SSE2__)
     1687        __asm__ __volatile__
     1688        (
     1689            "movq %1, %%xmm0\n\t"
     1690            "movq %%xmm0, %0\n\t"
     1691            : "=m" (*ptr)
     1692            : "m" (value)
     1693            : "memory", "xmm0"
     1694        );
     1695#else
     1696        __asm__ __volatile__
     1697        (
     1698            "fildll %1\n\t"
     1699            "fistpll %0\n\t"
     1700            : "=m" (*ptr)
     1701            : "m" (value)
     1702            : "memory"
     1703        );
     1704#endif
    16711705    }
     1706    else
     1707    {
     1708        T expected = *ptr;
     1709        while (!platform_cmpxchg64_strong(expected, value, ptr))
     1710        {
     1711            BOOST_ATOMIC_X86_PAUSE();
     1712        }
     1713    }
    16721714}
    16731715
    16741716template<typename T>
    16751717inline T
    16761718platform_load64(const volatile T * ptr) BOOST_NOEXCEPT
    16771719{
    1678     T expected = *ptr;
    1679     for (; !platform_cmpxchg64_strong(expected, expected, const_cast<volatile T*>(ptr));)
     1720    T value = T();
     1721
     1722    if (((uint32_t)ptr & 0x00000007) == 0)
    16801723    {
    1681         BOOST_ATOMIC_X86_PAUSE();
     1724#if defined(__SSE2__)
     1725        __asm__ __volatile__
     1726        (
     1727            "movq %1, %%xmm0\n\t"
     1728            "movq %%xmm0, %0\n\t"
     1729            : "=m" (value)
     1730            : "m" (*ptr)
     1731            : "memory", "xmm0"
     1732        );
     1733#else
     1734        __asm__ __volatile__
     1735        (
     1736            "fildll %1\n\t"
     1737            "fistpll %0\n\t"
     1738            : "=m" (value)
     1739            : "m" (*ptr)
     1740            : "memory"
     1741        );
     1742#endif
    16821743    }
    1683     return expected;
     1744    else
     1745    {
     1746        // We don't care for comparison result here; the previous value will be stored into value anyway.
     1747        platform_cmpxchg64_strong(value, value, const_cast<volatile T*>(ptr));
     1748    }
     1749
     1750    return value;
    16841751}
    16851752
    16861753#endif
     
    16901757}
    16911758
    16921759/* pull in 64-bit atomic type using cmpxchg8b above */
    1693 #if !defined(__x86_64__) && (defined(__i686__) || defined (__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8))
     1760#if !defined(__x86_64__) && defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
    16941761#include <boost/atomic/detail/cas64strong.hpp>
    16951762#endif
    16961763
  • libs/atomic/test/lockfree.cpp

     
    4343#define EXPECT_SHORT_LOCK_FREE 2
    4444#define EXPECT_INT_LOCK_FREE 2
    4545#define EXPECT_LONG_LOCK_FREE 2
    46 #define EXPECT_LLONG_LOCK_FREE 1
     46#if defined(BOOST_ATOMIC_X86_HAS_CMPXCHG8B)
     47#define EXPECT_LLONG_LOCK_FREE 2
     48#else
     49#define EXPECT_LLONG_LOCK_FREE 0
     50#endif
    4751#define EXPECT_POINTER_LOCK_FREE 2
    4852#define EXPECT_BOOL_LOCK_FREE 2
    4953