Index: boost/mpi/collectives/all_reduce.hpp =================================================================== --- boost/mpi/collectives/all_reduce.hpp (revision 77427) +++ boost/mpi/collectives/all_reduce.hpp (working copy) @@ -12,6 +12,8 @@ #ifndef BOOST_MPI_ALL_REDUCE_HPP #define BOOST_MPI_ALL_REDUCE_HPP +#include + // All-reduce falls back to reduce() + broadcast() in some cases. #include #include @@ -67,7 +69,17 @@ T* out_values, Op op, mpl::false_ /*is_mpi_op*/, mpl::false_ /*is_mpi_datatype*/) { - reduce(comm, in_values, n, out_values, op, 0); + if (in_values == MPI_IN_PLACE) { + // if in_values matches the in place tag, then the output + // buffer actually contains the input data. + // But we can just go back to the out of place + // implementation in this case. + // it's not clear how/if we can avoid the copy. + std::vector tmp_in( out_values, out_values + n); + reduce(comm, &(tmp_in[0]), n, out_values, op, 0); + } else { + reduce(comm, in_values, n, out_values, op, 0); + } broadcast(comm, out_values, n, 0); } } // end namespace detail @@ -83,6 +95,13 @@ template inline void +all_reduce(const communicator& comm, T* values, int n, Op op) +{ + all_reduce(comm, static_cast(MPI_IN_PLACE), n, values, op); +} + +template +inline void all_reduce(const communicator& comm, const T& in_value, T& out_value, Op op) { detail::all_reduce_impl(comm, &in_value, 1, &out_value, op, Index: libs/mpi/test/all_reduce_test.cpp =================================================================== --- libs/mpi/test/all_reduce_test.cpp (revision 77427) +++ libs/mpi/test/all_reduce_test.cpp (working copy) @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +59,16 @@ return point(p1.x + p2.x, p1.y + p2.y, p1.z + p2.z); } +// test lexical order +bool operator<(const point& p1, const point& p2) +{ + return (p1.x < p2.x + ? true + : (p1.x > p2.x + ? false + : p1.y < p2.y )); +} + namespace boost { namespace mpi { template <> @@ -67,9 +78,9 @@ template void -all_reduce_test(const communicator& comm, Generator generator, - const char* type_kind, Op op, const char* op_kind, - typename Generator::result_type init) +all_reduce_one_test(const communicator& comm, Generator generator, + const char* type_kind, Op op, const char* op_kind, + typename Generator::result_type init) { typedef typename Generator::result_type value_type; value_type value = generator(comm.rank()); @@ -97,6 +108,67 @@ (comm.barrier)(); } +template +void +all_reduce_array_test(const communicator& comm, Generator generator, + const char* type_kind, Op op, const char* op_kind, + typename Generator::result_type init, bool in_place) +{ + typedef typename Generator::result_type value_type; + value_type value = generator(comm.rank()); + std::vector send(10, value); + + using boost::mpi::all_reduce; + + if (comm.rank() == 0) { + char const* place = in_place ? "in place" : "out of place"; + std::cout << "Reducing (" << place << ") array to " << op_kind << " of " << type_kind << "..."; + std::cout.flush(); + } + std::vector result; + if (in_place) { + all_reduce(comm, &(send[0]), send.size(), op); + result.swap(send); + } else { + std::vector recv(10, value_type()); + all_reduce(comm, &(send[0]), send.size(), &(recv[0]), op); + result.swap(recv); + } + + // Compute expected result + std::vector generated_values; + for (int p = 0; p < comm.size(); ++p) + generated_values.push_back(generator(p)); + value_type expected_result = std::accumulate(generated_values.begin(), + generated_values.end(), + init, op); + + bool got_expected_result = (std::equal_range(result.begin(), result.end(), + expected_result) + == std::make_pair(result.begin(), result.end())); + BOOST_CHECK(got_expected_result); + if (got_expected_result && comm.rank() == 0) + std::cout << "OK." << std::endl; + + (comm.barrier)(); +} + +// Test the 3 families of all reduce: value, array in place, array ou of place +template +void +all_reduce_test(const communicator& comm, Generator generator, + const char* type_kind, Op op, const char* op_kind, + typename Generator::result_type init) +{ + const bool in_place = true; + const bool out_of_place = false; + all_reduce_one_test(comm, generator, type_kind, op, op_kind, init); + all_reduce_array_test(comm, generator, type_kind, op, op_kind, + init, in_place); + all_reduce_array_test(comm, generator, type_kind, op, op_kind, + init, out_of_place); +} + // Generates integers to test with all_reduce() struct int_generator { @@ -168,6 +240,11 @@ return x.value == y.value; } +bool operator<(const wrapped_int& x, const wrapped_int& y) +{ + return x.value < y.value; +} + // Generates wrapped_its to test with all_reduce() struct wrapped_int_generator { @@ -196,6 +273,8 @@ environment env(argc, argv); communicator comm; + const bool in_place = true; + const bool out_of_place = false; // Built-in MPI datatypes with built-in MPI operations all_reduce_test(comm, int_generator(), "integers", std::plus(), "sum", @@ -215,8 +294,8 @@ // Built-in MPI datatypes with user-defined operations all_reduce_test(comm, int_generator(17), "integers", secret_int_bit_and(), "bitwise and", -1); - - // Arbitrary types with user-defined, commutative operations. + + // Arbitrary types with user-defined, commutative operations. all_reduce_test(comm, wrapped_int_generator(17), "wrapped integers", std::plus(), "sum", wrapped_int(0));