From de83cdcec94572ccd6ade44dddacc494e3eed563 Mon Sep 17 00:00:00 2001 From: Pascal Germroth Date: Fri, 19 Jul 2013 02:03:55 +0200 Subject: [PATCH] Reorganize OpenMP algebra and state. #5 #6 #7 openmp_range_algebra: parallel for over a random access container. openmp_nested_algebra: processs parts of a split container in parallel. openmp_state: a split container based on vector>. openmp_algebra: use a range_algebra on each part of that container. --- .../numeric/odeint/external/openmp/openmp.hpp | 10 +- .../odeint/external/openmp/openmp_algebra.hpp | 101 ------------- .../openmp/openmp_algebra_dispatcher.hpp | 40 ------ .../external/openmp/openmp_nested_algebra.hpp | 71 ++++++++++ .../external/openmp/openmp_range_algebra.hpp | 65 +++++++++ .../odeint/external/openmp/openmp_resize.hpp | 87 ------------ .../odeint/external/openmp/openmp_state.hpp | 134 ++++++++++++++---- 7 files changed, 252 insertions(+), 256 deletions(-) delete mode 100644 boost/numeric/odeint/external/openmp/openmp_algebra.hpp delete mode 100644 boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp create mode 100644 boost/numeric/odeint/external/openmp/openmp_nested_algebra.hpp create mode 100644 boost/numeric/odeint/external/openmp/openmp_range_algebra.hpp delete mode 100644 boost/numeric/odeint/external/openmp/openmp_resize.hpp diff --git a/boost/numeric/odeint/external/openmp/openmp.hpp b/boost/numeric/odeint/external/openmp/openmp.hpp index c43b3cd9..66cd5897 100644 --- a/boost/numeric/odeint/external/openmp/openmp.hpp +++ b/boost/numeric/odeint/external/openmp/openmp.hpp @@ -18,11 +18,13 @@ #ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_HPP_INCLUDED #define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_HPP_INCLUDED +// level 1: parallel iteration over random access container +#include "openmp_range_algebra.hpp" + +// level 2: split range state #include "openmp_state.hpp" -#include "openmp_algebra.hpp" -#include "openmp_algebra_dispatcher.hpp" -#include "openmp_resize.hpp" -#include "openmp_system.hpp" +// level 3: process a random access container of sub-states in parallel +#include "openmp_nested_algebra.hpp" #endif diff --git a/boost/numeric/odeint/external/openmp/openmp_algebra.hpp b/boost/numeric/odeint/external/openmp/openmp_algebra.hpp deleted file mode 100644 index 684a86ba..00000000 --- a/boost/numeric/odeint/external/openmp/openmp_algebra.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - [auto_generated] - boost/numeric/odeint/external/openmp/openmp_algebra.hpp - - [begin_description] - Nested parallelized algebra for OpenMP. - [end_description] - - Copyright 2009-2011 Karsten Ahnert - Copyright 2009-2011 Mario Mulansky - - Distributed under the Boost Software License, Version 1.0. - (See accompanying file LICENSE_1_0.txt or - copy at http://www.boost.org/LICENSE_1_0.txt) - */ - - -#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_ALGEBRA_HPP_INCLUDED -#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_ALGEBRA_HPP_INCLUDED - -#include -#include -#include "openmp_state.hpp" - -namespace boost { -namespace numeric { -namespace odeint { - -/** \brief Basic OpenMP-parallelized algebra. - * - * Requires `s.size()` and `s[n]`, i.e. a Random Access Container. - */ -struct basic_openmp_algebra -{ - -// FIXME: _Pragma is C++11. -#define BOOST_ODEINT_GEN_BODY(n) \ - const size_t len = s0.size(); \ - _Pragma("omp parallel for schedule(dynamic)") \ - for( size_t i = 0 ; i < len ; i++ ) \ - op( BOOST_PP_ENUM_BINARY_PARAMS(n, s, [i] BOOST_PP_INTERCEPT) ); -BOOST_ODEINT_GEN_FOR_EACH(BOOST_ODEINT_GEN_BODY) -#undef BOOST_ODEINT_GEN_BODY - - - template< class S > - static typename norm_result_type< S >::type norm_inf( const S &s ) - { - using std::max; - using std::abs; - typedef typename norm_result_type< S >::type result_type; - result_type init = static_cast< result_type >( 0 ); -# pragma omp parallel for reduction(max: init) - for( size_t i = 0 ; i < s.size() ; ++i ) - init = max( init , abs(s[i]) ); - return init; - } - -}; - - -/** \brief OpenMP-parallelized algebra, wrapping another, non-parallelized algebra. - */ -template< class InnerAlgebra > -struct openmp_algebra -{ - -// FIXME: _Pragma is C++11. -#define BOOST_ODEINT_GEN_BODY(n) \ - const size_t len = s0.size(); \ - _Pragma("omp parallel for schedule(static,1)") \ - for( size_t i = 0 ; i < len ; i++ ) \ - InnerAlgebra::for_each##n( \ - BOOST_PP_ENUM_BINARY_PARAMS(n, s, [i] BOOST_PP_INTERCEPT) , \ - op \ - ); -BOOST_ODEINT_GEN_FOR_EACH(BOOST_ODEINT_GEN_BODY) -#undef BOOST_ODEINT_GEN_BODY - - - template< class InnerState > - static typename norm_result_type< InnerState >::type norm_inf( const openmp_state< InnerState > &s ) - { - using std::max; - using std::abs; - typedef typename norm_result_type< InnerState >::type result_type; - result_type init = static_cast< result_type >( 0 ); -# pragma omp parallel for reduction(max: init) schedule(static,1) - for( size_t i = 0 ; i < s.size() ; i++ ) - init = max( init , InnerAlgebra::norm_inf( s[i] ) ); - return init; - } - -}; - - -} -} -} - -#endif diff --git a/boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp b/boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp deleted file mode 100644 index 2a94da94..00000000 --- a/boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - [auto_generated] - boost/numeric/odeint/external/openmp/openmp_algebra_dispatcher.hpp - - [begin_description] - Algebra dispatcher to automatically chose suitable algebra. - [end_description] - - Copyright 2009-2013 Karsten Ahnert - Copyright 2009-2013 Mario Mulansky - - Distributed under the Boost Software License, Version 1.0. - (See accompanying file LICENSE_1_0.txt or - copy at http://www.boost.org/LICENSE_1_0.txt) - */ - -#ifndef BOOST_NUMERIC_ODEINT_OPENMP_OPENMP_ALGEBRA_DISPATCHER_HPP_INCLUDED -#define BOOST_NUMERIC_ODEINT_OPENMP_OPENMP_ALGEBRA_DISPATCHER_HPP_INCLUDED - -#include -#include -#include "openmp_state.hpp" - -namespace boost { -namespace numeric { -namespace odeint { - -template< class Inner > -struct algebra_dispatcher< openmp_state< Inner > > -{ - typedef openmp_algebra< - typename algebra_dispatcher< Inner >::algebra_type - > algebra_type; -}; - -} -} -} - -#endif diff --git a/boost/numeric/odeint/external/openmp/openmp_nested_algebra.hpp b/boost/numeric/odeint/external/openmp/openmp_nested_algebra.hpp new file mode 100644 index 00000000..57de3ef7 --- /dev/null +++ b/boost/numeric/odeint/external/openmp/openmp_nested_algebra.hpp @@ -0,0 +1,71 @@ +/* + [auto_generated] + boost/numeric/odeint/external/openmp/openmp_nested_algebra.hpp + + [begin_description] + Nested parallelized algebra for OpenMP. + [end_description] + + Copyright 2009-2011 Karsten Ahnert + Copyright 2009-2011 Mario Mulansky + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + + +#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_NESTED_ALGEBRA_HPP_INCLUDED +#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_NESTED_ALGEBRA_HPP_INCLUDED + +#include +#include + +namespace boost { +namespace numeric { +namespace odeint { + +/** \brief OpenMP-parallelized algebra, wrapping another, non-parallelized algebra. + * The NestedState must be a Random Access Container where the elements are sub-states + * which will be processed in parallel. + * + * Requires a NestedState with `s::value_type`, `s[n]` and `s.size()`. + */ +template< class InnerAlgebra > +struct openmp_nested_algebra +{ + +// FIXME: _Pragma is C++11. +#define BOOST_ODEINT_GEN_BODY(n) \ + const size_t len = s0.size(); \ + _Pragma("omp parallel for schedule(runtime)") \ + for( size_t i = 0 ; i < len ; i++ ) \ + InnerAlgebra::for_each##n( \ + BOOST_PP_ENUM_BINARY_PARAMS(n, s, [i] BOOST_PP_INTERCEPT) , \ + op \ + ); +BOOST_ODEINT_GEN_FOR_EACH(BOOST_ODEINT_GEN_BODY) +#undef BOOST_ODEINT_GEN_BODY + + + template< class NestedState > + static typename norm_result_type< typename NestedState::value_type >::type norm_inf( const NestedState &s ) + { + using std::max; + using std::abs; + typedef typename norm_result_type< typename NestedState::value_type >::type result_type; + result_type init = static_cast< result_type >( 0 ); +# pragma omp parallel for reduction(max: init) schedule(dynamic) + for( size_t i = 0 ; i < s.size() ; i++ ) + init = max( init , InnerAlgebra::norm_inf( s[i] ) ); + return init; + } + +}; + + +} +} +} + +#endif diff --git a/boost/numeric/odeint/external/openmp/openmp_range_algebra.hpp b/boost/numeric/odeint/external/openmp/openmp_range_algebra.hpp new file mode 100644 index 00000000..6ea9c5f4 --- /dev/null +++ b/boost/numeric/odeint/external/openmp/openmp_range_algebra.hpp @@ -0,0 +1,65 @@ +/* + [auto_generated] + boost/numeric/odeint/external/openmp/openmp_range_algebra.hpp + + [begin_description] + Range algebra for OpenMP. + [end_description] + + Copyright 2009-2011 Karsten Ahnert + Copyright 2009-2011 Mario Mulansky + + Distributed under the Boost Software License, Version 1.0. + (See accompanying file LICENSE_1_0.txt or + copy at http://www.boost.org/LICENSE_1_0.txt) + */ + + +#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_RANGE_ALGEBRA_HPP_INCLUDED +#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_RANGE_ALGEBRA_HPP_INCLUDED + +#include +#include + +namespace boost { +namespace numeric { +namespace odeint { + +/** \brief OpenMP-parallelized range algebra. + * + * Requires a state with `s.size()` and `s[n]`, i.e. a Random Access Container. + */ +struct openmp_range_algebra +{ + +// FIXME: _Pragma is C++11. +#define BOOST_ODEINT_GEN_BODY(n) \ + const size_t len = s0.size(); \ + _Pragma("omp parallel for schedule(runtime)") \ + for( size_t i = 0 ; i < len ; i++ ) \ + op( BOOST_PP_ENUM_BINARY_PARAMS(n, s, [i] BOOST_PP_INTERCEPT) ); +BOOST_ODEINT_GEN_FOR_EACH(BOOST_ODEINT_GEN_BODY) +#undef BOOST_ODEINT_GEN_BODY + + + template< class S > + static typename norm_result_type< S >::type norm_inf( const S &s ) + { + using std::max; + using std::abs; + typedef typename norm_result_type< S >::type result_type; + result_type init = static_cast< result_type >( 0 ); +# pragma omp parallel for reduction(max: init) schedule(dynamic) + for( size_t i = 0 ; i < s.size() ; ++i ) + init = max( init , abs(s[i]) ); + return init; + } + +}; + + +} +} +} + +#endif diff --git a/boost/numeric/odeint/external/openmp/openmp_resize.hpp b/boost/numeric/odeint/external/openmp/openmp_resize.hpp deleted file mode 100644 index 9d307ce7..00000000 --- a/boost/numeric/odeint/external/openmp/openmp_resize.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - [auto_generated] - boost/numeric/odeint/external/openmp/openmp_resize.hpp - - [begin_description] - Delegate resizing of OpenMP-state to inner state. - [end_description] - - Copyright 2009-2011 Karsten Ahnert - Copyright 2009-2011 Mario Mulansky - - Distributed under the Boost Software License, Version 1.0. - (See accompanying file LICENSE_1_0.txt or - copy at http://www.boost.org/LICENSE_1_0.txt) - */ - - -#ifndef BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_RESIZE_HPP_INCLUDED -#define BOOST_NUMERIC_ODEINT_EXTERNAL_OPENMP_OPENMP_RESIZE_HPP_INCLUDED - -#include -#include "openmp_state.hpp" - -namespace boost { -namespace numeric { -namespace odeint { - - -template< class InnerState > -struct is_resizeable< openmp_state< InnerState > > -{ - typedef typename is_resizeable< InnerState >::type type; - const static bool value = is_resizeable< InnerState >::value; -}; - - -template< class InnerState1, class InnerState2 > -struct same_size_impl< openmp_state< InnerState1 > , openmp_state< InnerState2 > > -{ - static bool same_size( const openmp_state< InnerState1 > &x , const openmp_state< InnerState2 > &y ) - { - if( x.size() != y.size() ) return false; - for( size_t i = 0 ; i != x.size() ; i++ ) - if( !same_size(x[i], y[i]) ) return false; - return true; - } -}; - - -template< class InnerStateOut, class InnerStateIn > -struct resize_impl< openmp_state< InnerStateOut > , openmp_state< InnerStateIn > > -{ - static void resize( openmp_state< InnerStateOut > &x , const openmp_state< InnerStateIn > &y ) - { - x.m_parts.resize( y.size() ); - x.offset.resize( y.size() ); -# pragma omp parallel for schedule(static,1) - for(size_t i = 0 ; i < x.size() ; i++) - boost::numeric::odeint::resize( x[i], y[i] ); - size_t off = 0; - for(size_t i = 0 ; i < x.size() ; i++) { - x.offset[i] = off; - off += x[i].size(); - } - } -}; - - -template< class InnerStateIn , class InnerStateOut > -struct copy_impl< openmp_state< InnerStateIn >, openmp_state< InnerStateOut > > -{ - static void copy( const openmp_state< InnerStateIn > &from, openmp_state< InnerStateOut > &to ) - { -# pragma omp parallel for schedule(static,1) - for(size_t i = 0 ; i < from.size() ; i++) - copy( from[i], to[i] ); - } -}; - - -} // odeint -} // numeric -} // boost - - -#endif // BOOST_NUMERIC_ODEINT_EXTERNAL_THRUST_THRUST_RESIZE_HPP_INCLUDED - diff --git a/boost/numeric/odeint/external/openmp/openmp_state.hpp b/boost/numeric/odeint/external/openmp/openmp_state.hpp index d04b066a..93143552 100644 --- a/boost/numeric/odeint/external/openmp/openmp_state.hpp +++ b/boost/numeric/odeint/external/openmp/openmp_state.hpp @@ -24,56 +24,142 @@ #include #include #include - +#include "openmp_nested_algebra.hpp" namespace boost { namespace numeric { namespace odeint { /** \brief A container that is split into distinct parts, for threading. + * Just a wrapper for vector>, use `copy` for splitting/joining. */ -template< class InnerState > -struct openmp_state +template< class T > +struct openmp_state : public std::vector< std::vector< T > > { - std::vector< InnerState > m_parts; - std::vector< size_t > offset; - openmp_state() {} - template< class Container > - openmp_state( const Container &data ) - : m_parts( omp_get_num_threads() ) , offset( m_parts.size() ) + openmp_state(size_t n, const std::vector& val = std::vector()) + : std::vector< std::vector< T > >(n, val) {} + + template + openmp_state(InputIterator first, InputIterator last) + : std::vector< std::vector< T > >(first, last) {} + + openmp_state(const std::vector< std::vector< T > > &orig) + : std::vector< std::vector< T > >(orig) {} + +}; + + + + +template< class T > +struct is_resizeable< openmp_state< T > > : boost::true_type { }; + + +template< class T > +struct same_size_impl< openmp_state< T > , openmp_state< T > > +{ + static bool same_size( const openmp_state< T > &x , const openmp_state< T > &y ) { - const size_t part = data.size() / m_parts.size(); -# pragma omp parallel for schedule(static,1) - for(size_t i = 0 ; i < m_parts.size() ; i++) { - const size_t start = i * part; - offset[i] = start; - const size_t end = (std::min)( (i + 1) * part, data.size() ); - resize( m_parts[i], boost::adaptors::slice(data, start, end) ); - boost::numeric::odeint::copy( boost::adaptors::slice(data, start, end), m_parts[i] ); - } + if( x.size() != y.size() ) return false; + for( size_t i = 0 ; i != x.size() ; i++ ) + if( x[i].size() != y[i].size() ) return false; + return true; } +}; - InnerState & operator[](size_t i) + +template< class T > +struct resize_impl< openmp_state< T > , openmp_state< T > > +{ + static void resize( openmp_state< T > &x , const openmp_state< T > &y ) { - return m_parts[i]; + x.resize( y.size() ); +# pragma omp parallel for schedule(dynamic) + for(size_t i = 0 ; i < x.size() ; i++) + x[i].resize( y[i].size() ); } +}; + - const InnerState & operator[](size_t i) const +/** \brief Copy data between openmp_states of same size. */ +template< class T > +struct copy_impl< openmp_state< T >, openmp_state< T > > +{ + static void copy( const openmp_state< T > &from, openmp_state< T > &to ) { - return m_parts[i]; +# pragma omp parallel for schedule(dynamic) + for(size_t i = 0 ; i < from.size() ; i++) + std::copy( from[i].begin() , from[i].end() , to.begin() ); } +}; + - size_t size() const + +/** \brief Copy data from some container to an openmp_state and resize it. + * Target container size will determine number of blocks to split into. + * If it is empty, it will be resized to the maximum number of OpenMP threads. + * SourceContainer must support `s::value_type`, `s::const_iterator`, `s.begin()`, `s.end()` and `s.size()`, + * with Random Access Iterators; i.e. it must be a Random Access Container. */ +template< class SourceContainer > +struct copy_impl< SourceContainer, openmp_state< typename SourceContainer::value_type > > +{ + static void copy( const SourceContainer &from, openmp_state< typename SourceContainer::value_type > &to ) { - return m_parts.size(); + if(to.size() == 0) to.resize( omp_get_max_threads() ); + const size_t part = from.size() / to.size(); +# pragma omp parallel for schedule(dynamic) + for(size_t i = 0 ; i < to.size() ; i++) { + typedef typename SourceContainer::const_iterator it_t; + const it_t begin = from.begin() + i * part; + it_t end = begin + part; + // for cases where from.size() % to.size() > 0 + if(i + 1 == to.size() || end > from.end()) end = from.end(); + to[i].resize(end - begin); + std::copy(begin, end, to[i].begin()); + } } +}; +/** \brief Copy data from an openmp_state to some container and resize it. + * TargetContainer must support `s::value_type`, `s::iterator`, `s.begin()` and `s.resize(n)`, + * i.e. it must be a `std::vector`. */ +template< class TargetContainer > +struct copy_impl< openmp_state< typename TargetContainer::value_type >, TargetContainer > +{ + static void copy( const openmp_state< typename TargetContainer::value_type > &from , TargetContainer &to ) + { + // resize target + size_t total_size = 0; + for(size_t i = 0 ; i < from.size() ; i++) + total_size += from[i].size(); + to.resize( total_size ); + // copy parts + typename TargetContainer::iterator out = to.begin(); + for(size_t i = 0 ; i < from.size() ; i++) + out = std::copy(from[i].begin(), from[i].end(), out); + } }; + +/** \brief OpenMP-parallelized algebra. + * For use with openmp_state. + */ +typedef openmp_nested_algebra< range_algebra > openmp_algebra; + + + +/** \brief Use `openmp_algebra` for `openmp_state`. */ +template< class T > +struct algebra_dispatcher< openmp_state< T > > +{ + typedef openmp_algebra algebra_type; +}; + + } } }