id,summary,reporter,owner,description,type,status,milestone,component,version,severity,resolution,keywords,cc 2810,Optimization for list::merge performance,Andrey Semashev,Ion Gaztañaga,"The current implementation of the list::merge method is quite inefficient since it always iterates through the *this sequence, even if there are no more elements to insert. It also inserts elements from the other sequence one by one, which may generate a lot of overhead for node pointers adjustments in case if there are equivalent nodes in either of the sequences. I suggest the following optimized version of the merge method (see the my_merge function): {{{ #include #include #include #include #include #include #include #include namespace bi = boost::intrusive; using boost::posix_time::ptime; typedef bi::list_base_hook< bi::link_mode< bi::auto_unlink > > BaseHook_t; struct MyData : public BaseHook_t { struct OrderByKey { typedef bool result_type; result_type operator() (MyData const& left, MyData const& right) const { return left.m_Key < right.m_Key; } }; struct Cloner { typedef MyData* result_type; result_type operator() (MyData const& that) const { return new MyData(that); } }; int m_Key; explicit MyData(int k) : m_Key(k) {} }; typedef bi::list< MyData, bi::base_hook< BaseHook_t >, bi::constant_time_size< false > > List_t; template< typename PredicateT > void my_merge(List_t& this_, List_t& x, PredicateT p) { typedef List_t::const_iterator const_iterator; typedef List_t::size_type size_type; const_iterator b = this_.cbegin(), e = this_.cend(); const_iterator ex = x.cend(); while (!x.empty()) { const_iterator bx = x.cbegin(); while (b != e && p(*bx, *b)) { ++b; } if (b != e) { // determine the end of the range of x to inject at b size_type n = 0; do { ++bx; ++n; } while (bx != ex && p(*bx, *b)); this_.splice(b, x, x.cbegin(), bx, n); } else { // the rest of the list is to be appended to the end this_.splice(e, x); break; } } } int main(int, char*[]) { enum { LIST1_SIZE = 1000000, LIST2_SIZE = LIST1_SIZE }; List_t l11, l12; for (unsigned int i = 0; i < LIST1_SIZE; ++i) { MyData* p = new MyData(std::rand()); l11.push_back(*p); } l11.sort(MyData::OrderByKey()); for (unsigned int i = 0; i < LIST2_SIZE; ++i) { MyData* p = new MyData(std::rand()); l12.push_back(*p); } l12.sort(MyData::OrderByKey()); List_t l21, l22; l21.clone_from(l11, MyData::Cloner(), boost::checked_deleter< MyData >()); l22.clone_from(l12, MyData::Cloner(), boost::checked_deleter< MyData >()); ptime start1, end1, start2, end2; start1 = boost::date_time::microsec_clock< ptime >::universal_time(); l11.merge(l12, MyData::OrderByKey()); end1 = boost::date_time::microsec_clock< ptime >::universal_time(); start2 = boost::date_time::microsec_clock< ptime >::universal_time(); my_merge(l21, l22, MyData::OrderByKey()); end2 = boost::date_time::microsec_clock< ptime >::universal_time(); unsigned int duration1 = end1.time_of_day().total_microseconds() - start1.time_of_day().total_microseconds(); unsigned int duration2 = end2.time_of_day().total_microseconds() - start2.time_of_day().total_microseconds(); std::cout << ""Boost.Intrusive version: "" << duration1 << "" us, optimized: "" << duration2 << "" us"" << std::endl; l11.clear_and_dispose(boost::checked_deleter< MyData >()); l12.clear_and_dispose(boost::checked_deleter< MyData >()); l21.clear_and_dispose(boost::checked_deleter< MyData >()); l22.clear_and_dispose(boost::checked_deleter< MyData >()); return 0; } }}} This test case shows that my_merge is faster than list::merge by an order of magnitude and even more if LIST1_SIZE != LIST2_SIZE. ",Patches,closed,Boost 1.39.0,intrusive,Boost 1.38.0,Optimization,fixed,list merge performance,