From 855c58cbb5652dbcf424bc0f594a877e2d27d0ce Mon Sep 17 00:00:00 2001 From: Giulio Eulisse <10544+ktf@users.noreply.github.com> Date: Fri, 20 Dec 2024 17:31:55 +0100 Subject: [PATCH] Move a bunch of stuff out of line --- Tools/ML/CMakeLists.txt | 2 +- Tools/ML/MlResponse.cxx | 166 ++++++++++++++++++++++++++++++++++++++++ Tools/ML/MlResponse.h | 126 +++--------------------------- 3 files changed, 178 insertions(+), 116 deletions(-) create mode 100644 Tools/ML/MlResponse.cxx diff --git a/Tools/ML/CMakeLists.txt b/Tools/ML/CMakeLists.txt index b95c108584a..950ec3380ca 100644 --- a/Tools/ML/CMakeLists.txt +++ b/Tools/ML/CMakeLists.txt @@ -10,6 +10,6 @@ # or submit itself to any jurisdiction. o2physics_add_library(MLCore - SOURCES model.cxx PUBLIC_LINK_LIBRARIES O2::Framework O2Physics::AnalysisCore ONNXRuntime::ONNXRuntime + SOURCES model.cxx MlResponse.cxx ) diff --git a/Tools/ML/MlResponse.cxx b/Tools/ML/MlResponse.cxx new file mode 100644 index 00000000000..02eb57b215e --- /dev/null +++ b/Tools/ML/MlResponse.cxx @@ -0,0 +1,166 @@ +// Copyright 2019-2020 CERN and copyright holders of ALICE O2. +// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders. +// All rights not expressly granted are reserved. +// +// This software is distributed under the terms of the GNU General Public +// License v3 (GPL Version 3), copied verbatim in the file "COPYING". +// +// In applying this license CERN does not waive the privileges and immunities +// granted to it by virtue of its status as an Intergovernmental Organization +// or submit itself to any jurisdiction. + +/// \file MlResponse.h +/// \brief Class to compute the ML response for analysis selections +#include "Tools/ML/MlResponse.h" +#include "Tools/ML/model.h" +#include "CCDB/CcdbApi.h" + +namespace o2::analysis +{ + +template +void MlResponse::configure(const std::vector& binsLimits, const o2::framework::LabeledArray& cuts, const std::vector& cutDir, const uint8_t& nClasses) +{ + if (cutDir.size() != nClasses) { + LOG(fatal) << "Number of classes (" << static_cast(nClasses) << ") different from the number of cuts on model scores (" << cutDir.size() << ")! Please check your configurables."; + } + + this->mBinsLimits = binsLimits; + mCuts = cuts; + mCutDir = cutDir; + mNClasses = nClasses; + mNModels = binsLimits.size() - 1; + this->mModels = std::vector(mNModels); + mPaths = std::vector(mNModels); +} + +/// Set model paths to CCDB +/// \param onnxFiles is a vector of onnx file names, one for each bin +/// \param ccdbApi is the CCDB API +/// \param pathsCCDB is a vector of model paths in CCDB, one for each bin +/// \param timestampCCDB is the CCDB timestamp +/// \note On the CCDB, different models must be stored in different folders +template +void MlResponse::setModelPathsCCDB(const std::vector& onnxFiles, const o2::ccdb::CcdbApi& ccdbApi, const std::vector& pathsCCDB, int64_t timestampCCDB) +{ + if (onnxFiles.size() != mNModels) { + LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables."; + } + if (pathsCCDB.size() != mNModels) { + LOG(fatal) << "Number of expected models (" << mNModels << ") different from the number of CCDB paths (" << pathsCCDB.size() << ")! Please check your configurables."; + } + + // check that the path is unique for each BDT model (otherwise CCDB download does not work as expected) + for (auto iThisFile{0}; iThisFile < mNModels; ++iThisFile) { + for (auto iOtherFile{iThisFile + 1}; iOtherFile < mNModels; ++iOtherFile) { + if ((pathsCCDB[iThisFile] == pathsCCDB[iOtherFile]) && (onnxFiles[iThisFile] != onnxFiles[iOtherFile])) { + LOGP(fatal, "More than one model ({} and {}) in the same CCDB directory ({})! Each directory in CCDB can contain only one model. Please check your configurables.", onnxFiles[iThisFile], onnxFiles[iOtherFile], pathsCCDB[iThisFile]); + } + } + } + + for (auto iFile{0}; iFile < mNModels; ++iFile) { + std::map metadata; + bool retrieveSuccess = ccdbApi.retrieveBlob(pathsCCDB[iFile], ".", metadata, timestampCCDB, false, onnxFiles[iFile]); + if (retrieveSuccess) { + mPaths[iFile] = onnxFiles[iFile]; + } else { + LOG(fatal) << "Error encountered while accessing the ML model from " << pathsCCDB[iFile] << "! Maybe the ML model doesn't exist yet for this run number or timestamp?"; + } + } +} +/// Initialize class instance (initialize OnnxModels) +/// \param enableOptimizations is a switch to enable optimizations +/// \param threads is the number of active threads +template +void MlResponse::init(bool enableOptimizations, int threads) +{ + uint8_t counterModel{0}; + for (const auto& path : mPaths) { + this->mModels[counterModel].initModel(path, enableOptimizations, threads); + ++counterModel; + } +} + +template +template +std::vector MlResponse::getModelOutput(T1& input, const T2& nModel) +{ + if (nModel < 0 || static_cast(nModel) >= this->mModels.size()) { + LOG(fatal) << "Model index " << nModel << " is out of range! The number of initialised models is " << this->mModels.size() << ". Please check your configurables."; + } + + TypeOutputScore* outputPtr = this->mModels[nModel].evalModel(input); + return std::vector{outputPtr, outputPtr + mNClasses}; +} + +template +template +bool MlResponse::isSelectedMl(T1& input, const T2& candVar) +{ + int nModel = findBin(candVar); + auto output = getModelOutput(input, nModel); + uint8_t iClass{0}; + for (const auto& outputValue : output) { + uint8_t dir = mCutDir.at(iClass); + if (dir != o2::cuts_ml::CutDirection::CutNot) { + if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) { + return false; + } + if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) { + return false; + } + } + ++iClass; + } + return true; +} + +template +template +bool MlResponse::isSelectedMl(T1& input, const T2& candVar, std::vector& output) +{ + int nModel = findBin(candVar); + output = getModelOutput(input, nModel); + uint8_t iClass{0}; + for (const auto& outputValue : output) { + uint8_t dir = mCutDir.at(iClass); + if (dir != o2::cuts_ml::CutDirection::CutNot) { + if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) { + return false; + } + if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) { + return false; + } + } + ++iClass; + } + return true; +} + +template +void MlResponse::cacheInputFeaturesIndices(std::vector const& cfgInputFeatures) +{ + setAvailableInputFeatures(); + for (const auto& inputFeature : cfgInputFeatures) { + if (mAvailableInputFeatures.count(inputFeature)) { + mCachedIndices.emplace_back(mAvailableInputFeatures[inputFeature]); + } else { + LOG(fatal) << "Input feature " << inputFeature << " not available! Please check your configurables."; + } + } +} + +template +void MlResponse::setModelPathsLocal(const std::vector& onnxFiles) +{ + if (onnxFiles.size() != mNModels) { + LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables."; + } + mPaths = onnxFiles; +} + +template bool MlResponse::isSelectedMl, float>(std::vector&, const float&, std::vector&); +template class MlResponse; +template class MlResponse; +} // namespace o2::analysis diff --git a/Tools/ML/MlResponse.h b/Tools/ML/MlResponse.h index 127512e52ee..49847982efe 100644 --- a/Tools/ML/MlResponse.h +++ b/Tools/ML/MlResponse.h @@ -61,20 +61,7 @@ class MlResponse /// \param cuts is a LabeledArray containing selections per bin /// \param cutDir is a vector telling whether to reject score values greater or smaller than the threshold /// \param nClasses is the number of classes for each model - void configure(const std::vector& binsLimits, const o2::framework::LabeledArray& cuts, const std::vector& cutDir, const uint8_t& nClasses) - { - if (cutDir.size() != nClasses) { - LOG(fatal) << "Number of classes (" << static_cast(nClasses) << ") different from the number of cuts on model scores (" << cutDir.size() << ")! Please check your configurables."; - } - - mBinsLimits = binsLimits; - mCuts = cuts; - mCutDir = cutDir; - mNClasses = nClasses; - mNModels = binsLimits.size() - 1; - mModels = std::vector(mNModels); - mPaths = std::vector(mNModels); - } + void configure(const std::vector& binsLimits, const o2::framework::LabeledArray& cuts, const std::vector& cutDir, const uint8_t& nClasses); /// Set model paths to CCDB /// \param onnxFiles is a vector of onnx file names, one for each bin @@ -82,110 +69,34 @@ class MlResponse /// \param pathsCCDB is a vector of model paths in CCDB, one for each bin /// \param timestampCCDB is the CCDB timestamp /// \note On the CCDB, different models must be stored in different folders - void setModelPathsCCDB(const std::vector& onnxFiles, const o2::ccdb::CcdbApi& ccdbApi, const std::vector& pathsCCDB, int64_t timestampCCDB) - { - if (onnxFiles.size() != mNModels) { - LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables."; - } - if (pathsCCDB.size() != mNModels) { - LOG(fatal) << "Number of expected models (" << mNModels << ") different from the number of CCDB paths (" << pathsCCDB.size() << ")! Please check your configurables."; - } - - // check that the path is unique for each BDT model (otherwise CCDB download does not work as expected) - for (auto iThisFile{0}; iThisFile < mNModels; ++iThisFile) { - for (auto iOtherFile{iThisFile + 1}; iOtherFile < mNModels; ++iOtherFile) { - if ((pathsCCDB[iThisFile] == pathsCCDB[iOtherFile]) && (onnxFiles[iThisFile] != onnxFiles[iOtherFile])) { - LOGP(fatal, "More than one model ({} and {}) in the same CCDB directory ({})! Each directory in CCDB can contain only one model. Please check your configurables.", onnxFiles[iThisFile], onnxFiles[iOtherFile], pathsCCDB[iThisFile]); - } - } - } - - for (auto iFile{0}; iFile < mNModels; ++iFile) { - std::map metadata; - bool retrieveSuccess = ccdbApi.retrieveBlob(pathsCCDB[iFile], ".", metadata, timestampCCDB, false, onnxFiles[iFile]); - if (retrieveSuccess) { - mPaths[iFile] = onnxFiles[iFile]; - } else { - LOG(fatal) << "Error encountered while accessing the ML model from " << pathsCCDB[iFile] << "! Maybe the ML model doesn't exist yet for this run number or timestamp?"; - } - } - } + void setModelPathsCCDB(const std::vector& onnxFiles, const o2::ccdb::CcdbApi& ccdbApi, const std::vector& pathsCCDB, int64_t timestampCCDB); /// Set model paths to local or cvmfs /// \param onnxFiles is a vector of onnx file names, one for each bin - void setModelPathsLocal(const std::vector& onnxFiles) - { - if (onnxFiles.size() != mNModels) { - LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables."; - } - mPaths = onnxFiles; - } + void setModelPathsLocal(const std::vector& onnxFiles); /// Initialize class instance (initialize OnnxModels) /// \param enableOptimizations is a switch to enable optimizations /// \param threads is the number of active threads - void init(bool enableOptimizations = false, int threads = 0) - { - uint8_t counterModel{0}; - for (const auto& path : mPaths) { - mModels[counterModel].initModel(path, enableOptimizations, threads); - ++counterModel; - } - } + void init(bool enableOptimizations = false, int threads = 0); /// Method to translate configurable input-feature strings into integers /// \param cfgInputFeatures array of input features names - void cacheInputFeaturesIndices(std::vector const& cfgInputFeatures) - { - setAvailableInputFeatures(); - for (const auto& inputFeature : cfgInputFeatures) { - if (mAvailableInputFeatures.count(inputFeature)) { - mCachedIndices.emplace_back(mAvailableInputFeatures[inputFeature]); - } else { - LOG(fatal) << "Input feature " << inputFeature << " not available! Please check your configurables."; - } - } - } + void cacheInputFeaturesIndices(std::vector const& cfgInputFeatures); /// Get vector with model predictions /// \param input a vector containing the values of features used in the model /// \param nModel is the model index /// \return model prediction for each class and the selected model template - std::vector getModelOutput(T1& input, const T2& nModel) - { - if (nModel < 0 || static_cast(nModel) >= mModels.size()) { - LOG(fatal) << "Model index " << nModel << " is out of range! The number of initialised models is " << mModels.size() << ". Please check your configurables."; - } - - TypeOutputScore* outputPtr = mModels[nModel].evalModel(input); - return std::vector{outputPtr, outputPtr + mNClasses}; - } + std::vector getModelOutput(T1& input, const T2& nModel); /// ML selections /// \param input is the input features /// \param candVar is the variable value (e.g. pT) used to select which model to use /// \return boolean telling if model predictions pass the cuts template - bool isSelectedMl(T1& input, const T2& candVar) - { - int nModel = findBin(candVar); - auto output = getModelOutput(input, nModel); - uint8_t iClass{0}; - for (const auto& outputValue : output) { - uint8_t dir = mCutDir.at(iClass); - if (dir != o2::cuts_ml::CutDirection::CutNot) { - if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) { - return false; - } - if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) { - return false; - } - } - ++iClass; - } - return true; - } + bool isSelectedMl(T1& input, const T2& candVar); /// ML selections /// \param input is the input features @@ -193,25 +104,7 @@ class MlResponse /// \param output is a container to be filled with model output /// \return boolean telling if model predictions pass the cuts template - bool isSelectedMl(T1& input, const T2& candVar, std::vector& output) - { - int nModel = findBin(candVar); - output = getModelOutput(input, nModel); - uint8_t iClass{0}; - for (const auto& outputValue : output) { - uint8_t dir = mCutDir.at(iClass); - if (dir != o2::cuts_ml::CutDirection::CutNot) { - if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) { - return false; - } - if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) { - return false; - } - } - ++iClass; - } - return true; - } + bool isSelectedMl(T1& input, const T2& candVar, std::vector& output); protected: std::vector mModels; // OnnxModel objects, one for each bin @@ -244,6 +137,9 @@ class MlResponse } }; +extern template class MlResponse; +extern template class MlResponse; + } // namespace analysis } // namespace o2