From 009014027bcd51aa9ae7bdaa1997cd40c9db53a8 Mon Sep 17 00:00:00 2001 From: Joachim Jenke Date: Wed, 17 Jan 2024 14:52:50 +0100 Subject: [PATCH 1/3] Fix data race in nestkernel/per_thread_bool_indicator.cpp --- nestkernel/connection_manager.cpp | 4 +- nestkernel/event_delivery_manager.cpp | 16 ++++---- nestkernel/per_thread_bool_indicator.cpp | 49 ++++++++---------------- nestkernel/per_thread_bool_indicator.h | 33 +++++++++++++++- nestkernel/source_table.h | 6 +-- 5 files changed, 62 insertions(+), 46 deletions(-) diff --git a/nestkernel/connection_manager.cpp b/nestkernel/connection_manager.cpp index 6dd7c012fb..6fbfd04d07 100644 --- a/nestkernel/connection_manager.cpp +++ b/nestkernel/connection_manager.cpp @@ -876,13 +876,13 @@ nest::ConnectionManager::connect_( Node& source, { #pragma omp atomic write has_primary_connections_ = true; - check_primary_connections_[ tid ].set_true(); + check_primary_connections_.set_true( tid ); } else if ( check_secondary_connections_[ tid ].is_false() and not is_primary ) { #pragma omp atomic write secondary_connections_exist_ = true; - check_secondary_connections_[ tid ].set_true(); + check_secondary_connections_.set_true( tid ); } } diff --git a/nestkernel/event_delivery_manager.cpp b/nestkernel/event_delivery_manager.cpp index 4979b10b5e..cca5270577 100644 --- a/nestkernel/event_delivery_manager.cpp +++ b/nestkernel/event_delivery_manager.cpp @@ -790,7 +790,7 @@ EventDeliveryManager::gather_target_data( const size_t tid ) assert( not kernel().connection_manager.is_source_table_cleared() ); // assume all threads have some work to do - gather_completed_checker_[ tid ].set_false(); + gather_completed_checker_.set_false( tid ); assert( gather_completed_checker_.all_false() ); const AssignedRanks assigned_ranks = kernel().vp_manager.get_assigned_ranks( tid ); @@ -802,7 +802,7 @@ EventDeliveryManager::gather_target_data( const size_t tid ) { // assume this is the last gather round and change to false // otherwise - gather_completed_checker_[ tid ].set_true(); + gather_completed_checker_.set_true( tid ); #pragma omp master { @@ -819,7 +819,7 @@ EventDeliveryManager::gather_target_data( const size_t tid ) assigned_ranks, kernel().mpi_manager.get_send_recv_count_target_data_per_rank() ); const bool gather_completed = collocate_target_data_buffers_( tid, assigned_ranks, send_buffer_position ); - gather_completed_checker_[ tid ].logical_and( gather_completed ); + gather_completed_checker_.logical_and( tid, gather_completed ); if ( gather_completed_checker_.all_true() ) { @@ -842,7 +842,7 @@ EventDeliveryManager::gather_target_data( const size_t tid ) #pragma omp barrier const bool distribute_completed = distribute_target_data_buffers_( tid ); - gather_completed_checker_[ tid ].logical_and( distribute_completed ); + gather_completed_checker_.logical_and( tid, distribute_completed ); // resize mpi buffers, if necessary and allowed if ( gather_completed_checker_.any_false() and kernel().mpi_manager.adaptive_target_buffers() ) @@ -864,7 +864,7 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) assert( not kernel().connection_manager.is_source_table_cleared() ); // assume all threads have some work to do - gather_completed_checker_[ tid ].set_false(); + gather_completed_checker_.set_false( tid ); assert( gather_completed_checker_.all_false() ); const AssignedRanks assigned_ranks = kernel().vp_manager.get_assigned_ranks( tid ); @@ -874,7 +874,7 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) while ( gather_completed_checker_.any_false() ) { // assume this is the last gather round and change to false otherwise - gather_completed_checker_[ tid ].set_true(); + gather_completed_checker_.set_true( tid ); #pragma omp master { @@ -891,7 +891,7 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) const bool gather_completed = collocate_target_data_buffers_compressed_( tid, assigned_ranks, send_buffer_position ); - gather_completed_checker_[ tid ].logical_and( gather_completed ); + gather_completed_checker_.logical_and( tid, gather_completed ); if ( gather_completed_checker_.all_true() ) { @@ -916,7 +916,7 @@ EventDeliveryManager::gather_target_data_compressed( const size_t tid ) // all data it is responsible for to buffers. Now combine with information on whether other ranks // have sent all their data. Note: All threads will return the same value for distribute_completed. const bool distribute_completed = distribute_target_data_buffers_( tid ); - gather_completed_checker_[ tid ].logical_and( distribute_completed ); + gather_completed_checker_.logical_and( tid, distribute_completed ); // resize mpi buffers, if necessary and allowed if ( gather_completed_checker_.any_false() and kernel().mpi_manager.adaptive_target_buffers() ) diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index 2028574ff4..1bf13345e3 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -50,62 +50,47 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status kernel().vp_manager.assert_single_threaded(); per_thread_status_.clear(); per_thread_status_.resize( num_threads, BoolIndicatorUInt64( status ) ); + size_ = num_threads; + if ( status ) + are_true_ = num_threads; + else + are_true_ = 0; } bool PerThreadBoolIndicator::all_false() const { #pragma omp barrier - for ( auto it = per_thread_status_.begin(); it < per_thread_status_.end(); ++it ) - { - if ( it->is_true() ) - { - return false; - } - } - return true; + bool ret = ( are_true_ == 0 ); +#pragma omp barrier + return ret; } bool PerThreadBoolIndicator::all_true() const { #pragma omp barrier - for ( auto it = per_thread_status_.begin(); it < per_thread_status_.end(); ++it ) - { - if ( it->is_false() ) - { - return false; - } - } - return true; + bool ret = ( are_true_ == size_ ); +#pragma omp barrier + return ret; } bool PerThreadBoolIndicator::any_false() const { #pragma omp barrier - for ( auto it = per_thread_status_.begin(); it < per_thread_status_.end(); ++it ) - { - if ( it->is_false() ) - { - return true; - } - } - return false; + bool ret = ( are_true_ < size_ ); +#pragma omp barrier + return ret; } bool PerThreadBoolIndicator::any_true() const { #pragma omp barrier - for ( auto it = per_thread_status_.begin(); it < per_thread_status_.end(); ++it ) - { - if ( it->is_true() ) - { - return true; - } - } - return false; + bool ret = ( are_true_ > 0 ); +#pragma omp barrier + return ret; } } // namespace nest diff --git a/nestkernel/per_thread_bool_indicator.h b/nestkernel/per_thread_bool_indicator.h index 9072d22952..c8636a88ae 100644 --- a/nestkernel/per_thread_bool_indicator.h +++ b/nestkernel/per_thread_bool_indicator.h @@ -52,15 +52,17 @@ class BoolIndicatorUInt64 bool is_true() const; bool is_false() const; + +protected: void set_true(); void set_false(); - void logical_and( const bool status ); private: static constexpr std::uint_fast64_t true_uint64 = true; static constexpr std::uint_fast64_t false_uint64 = false; std::uint_fast64_t status_; + friend class PerThreadBoolIndicator; }; inline bool @@ -106,6 +108,34 @@ class PerThreadBoolIndicator BoolIndicatorUInt64& operator[]( const size_t tid ); + void + set_true( const size_t tid ) + { + if ( per_thread_status_[ tid ].is_false() ) + { + are_true_++; + per_thread_status_[ tid ].set_true(); + } + } + void + set_false( const size_t tid ) + { + if ( per_thread_status_[ tid ].is_true() ) + { + are_true_--; + per_thread_status_[ tid ].set_false(); + } + } + void + logical_and( const size_t tid, const bool status ) + { + if ( per_thread_status_[ tid ].is_true() && !status ) + { + are_true_--; + per_thread_status_[ tid ].set_false(); + } + } + /** * Resize to the given number of threads and set all elements to false. */ @@ -133,6 +163,7 @@ class PerThreadBoolIndicator private: std::vector< BoolIndicatorUInt64 > per_thread_status_; + std::atomic< int > size_ { 0 }, are_true_ { 0 }; }; } // namespace nest diff --git a/nestkernel/source_table.h b/nestkernel/source_table.h index 0ada191e85..ac5574ca6e 100644 --- a/nestkernel/source_table.h +++ b/nestkernel/source_table.h @@ -372,7 +372,7 @@ SourceTable::clear( const size_t tid ) it->clear(); } sources_[ tid ].clear(); - is_cleared_[ tid ].set_true(); + is_cleared_.set_true( tid ); } inline void @@ -412,7 +412,7 @@ SourceTable::save_entry_point( const size_t tid ) assert( current_positions_[ tid ].lcid == -1 ); saved_positions_[ tid ].lcid = -1; } - saved_entry_point_[ tid ].set_true(); + saved_entry_point_.set_true( tid ); } } @@ -420,7 +420,7 @@ inline void SourceTable::restore_entry_point( const size_t tid ) { current_positions_[ tid ] = saved_positions_[ tid ]; - saved_entry_point_[ tid ].set_false(); + saved_entry_point_.set_false( tid ); } inline void From 5a01bd53eb5a4b25b4f292d56cb8b8abc63011a0 Mon Sep 17 00:00:00 2001 From: Joachim Date: Wed, 6 Mar 2024 10:45:08 +0100 Subject: [PATCH 2/3] Apply suggestions from code review Co-authored-by: Hans Ekkehard Plesser --- nestkernel/per_thread_bool_indicator.cpp | 6 ++++++ nestkernel/per_thread_bool_indicator.h | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index 1bf13345e3..105b7ca2c6 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -52,14 +52,20 @@ PerThreadBoolIndicator::initialize( const size_t num_threads, const bool status per_thread_status_.resize( num_threads, BoolIndicatorUInt64( status ) ); size_ = num_threads; if ( status ) + { are_true_ = num_threads; + } else + { are_true_ = 0; + } } bool PerThreadBoolIndicator::all_false() const { +// We need two barriers here to ensure that no thread can continue and change the result +// before all threads have determined the result. #pragma omp barrier bool ret = ( are_true_ == 0 ); #pragma omp barrier diff --git a/nestkernel/per_thread_bool_indicator.h b/nestkernel/per_thread_bool_indicator.h index c8636a88ae..b5178002d1 100644 --- a/nestkernel/per_thread_bool_indicator.h +++ b/nestkernel/per_thread_bool_indicator.h @@ -117,6 +117,7 @@ class PerThreadBoolIndicator per_thread_status_[ tid ].set_true(); } } + void set_false( const size_t tid ) { @@ -126,10 +127,11 @@ class PerThreadBoolIndicator per_thread_status_[ tid ].set_false(); } } + void logical_and( const size_t tid, const bool status ) { - if ( per_thread_status_[ tid ].is_true() && !status ) + if ( per_thread_status_[ tid ].is_true() and not status ) { are_true_--; per_thread_status_[ tid ].set_false(); @@ -163,7 +165,15 @@ class PerThreadBoolIndicator private: std::vector< BoolIndicatorUInt64 > per_thread_status_; - std::atomic< int > size_ { 0 }, are_true_ { 0 }; + std::atomic< int > size_ { 0 }; + + /** Number of per-thread indicators currently true + * + * are_true_ == 0 -> all are false + * are_true_ == size_ -> all are true + * 0 < are_true_ < size_ -> some true, some false + */ + std::atomic< int > are_true_ { 0 }; }; } // namespace nest From 4b185a4a020e021abe48d2ec85071c65168a1e02 Mon Sep 17 00:00:00 2001 From: Joachim Jenke Date: Wed, 6 Mar 2024 10:47:01 +0100 Subject: [PATCH 3/3] Two small fixes --- nestkernel/per_thread_bool_indicator.cpp | 2 ++ nestkernel/per_thread_bool_indicator.h | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/nestkernel/per_thread_bool_indicator.cpp b/nestkernel/per_thread_bool_indicator.cpp index 105b7ca2c6..267f753298 100644 --- a/nestkernel/per_thread_bool_indicator.cpp +++ b/nestkernel/per_thread_bool_indicator.cpp @@ -67,6 +67,8 @@ PerThreadBoolIndicator::all_false() const // We need two barriers here to ensure that no thread can continue and change the result // before all threads have determined the result. #pragma omp barrier + // We need two barriers here to ensure that no thread can continue and change the result + // before all threads have determined the result. bool ret = ( are_true_ == 0 ); #pragma omp barrier return ret; diff --git a/nestkernel/per_thread_bool_indicator.h b/nestkernel/per_thread_bool_indicator.h index b5178002d1..bafd79a307 100644 --- a/nestkernel/per_thread_bool_indicator.h +++ b/nestkernel/per_thread_bool_indicator.h @@ -24,6 +24,7 @@ #define PER_THREAD_BOOL_INDICATOR_H // C++ includes: +#include #include #include #include @@ -165,8 +166,8 @@ class PerThreadBoolIndicator private: std::vector< BoolIndicatorUInt64 > per_thread_status_; - std::atomic< int > size_ { 0 }; - + int size_ { 0 }; + /** Number of per-thread indicators currently true * * are_true_ == 0 -> all are false