Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move a bunch of stuff out of line #9089

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Tools/ML/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
# or submit itself to any jurisdiction.

o2physics_add_library(MLCore
SOURCES model.cxx
PUBLIC_LINK_LIBRARIES O2::Framework O2Physics::AnalysisCore ONNXRuntime::ONNXRuntime
SOURCES model.cxx MlResponse.cxx
)
166 changes: 166 additions & 0 deletions Tools/ML/MlResponse.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.

Check warning on line 1 in Tools/ML/MlResponse.cxx

View workflow job for this annotation

GitHub Actions / O2 linter

[doc/file]

Provide mandatory file documentation.

Check warning on line 1 in Tools/ML/MlResponse.cxx

View workflow job for this annotation

GitHub Actions / O2 linter

[name/workflow-file]

Name of a workflow file must match the name of the main struct in it (without the PWG prefix). (Class implementation files should be in "Core" directories.)
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file MlResponse.h
/// \brief Class to compute the ML response for analysis selections

Check warning on line 13 in Tools/ML/MlResponse.cxx

View workflow job for this annotation

GitHub Actions / O2 linter

[doc/file]

Documentation for \file is missing, incorrect or misplaced.

Check warning on line 13 in Tools/ML/MlResponse.cxx

View workflow job for this annotation

GitHub Actions / O2 linter

[doc/file]

Documentation for \author is missing, incorrect or misplaced.
#include "Tools/ML/MlResponse.h"
#include "Tools/ML/model.h"
#include "CCDB/CcdbApi.h"

namespace o2::analysis
{

template <typename TypeOutputScore>
void MlResponse<TypeOutputScore>::configure(const std::vector<double>& binsLimits, const o2::framework::LabeledArray<double>& cuts, const std::vector<int>& cutDir, const uint8_t& nClasses)
{
if (cutDir.size() != nClasses) {
LOG(fatal) << "Number of classes (" << static_cast<int>(nClasses) << ") different from the number of cuts on model scores (" << cutDir.size() << ")! Please check your configurables.";
}

this->mBinsLimits = binsLimits;
mCuts = cuts;
mCutDir = cutDir;
mNClasses = nClasses;
mNModels = binsLimits.size() - 1;
this->mModels = std::vector<o2::ml::OnnxModel>(mNModels);
mPaths = std::vector<std::string>(mNModels);
}

/// Set model paths to CCDB
/// \param onnxFiles is a vector of onnx file names, one for each bin
/// \param ccdbApi is the CCDB API
/// \param pathsCCDB is a vector of model paths in CCDB, one for each bin
/// \param timestampCCDB is the CCDB timestamp
/// \note On the CCDB, different models must be stored in different folders
template <typename TypeOutputScore>
void MlResponse<TypeOutputScore>::setModelPathsCCDB(const std::vector<std::string>& onnxFiles, const o2::ccdb::CcdbApi& ccdbApi, const std::vector<std::string>& pathsCCDB, int64_t timestampCCDB)
{
if (onnxFiles.size() != mNModels) {
LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables.";
}
if (pathsCCDB.size() != mNModels) {
LOG(fatal) << "Number of expected models (" << mNModels << ") different from the number of CCDB paths (" << pathsCCDB.size() << ")! Please check your configurables.";
}

// check that the path is unique for each BDT model (otherwise CCDB download does not work as expected)
for (auto iThisFile{0}; iThisFile < mNModels; ++iThisFile) {
for (auto iOtherFile{iThisFile + 1}; iOtherFile < mNModels; ++iOtherFile) {
if ((pathsCCDB[iThisFile] == pathsCCDB[iOtherFile]) && (onnxFiles[iThisFile] != onnxFiles[iOtherFile])) {
LOGP(fatal, "More than one model ({} and {}) in the same CCDB directory ({})! Each directory in CCDB can contain only one model. Please check your configurables.", onnxFiles[iThisFile], onnxFiles[iOtherFile], pathsCCDB[iThisFile]);
}
}
}

for (auto iFile{0}; iFile < mNModels; ++iFile) {
std::map<std::string, std::string> metadata;
bool retrieveSuccess = ccdbApi.retrieveBlob(pathsCCDB[iFile], ".", metadata, timestampCCDB, false, onnxFiles[iFile]);
if (retrieveSuccess) {
mPaths[iFile] = onnxFiles[iFile];
} else {
LOG(fatal) << "Error encountered while accessing the ML model from " << pathsCCDB[iFile] << "! Maybe the ML model doesn't exist yet for this run number or timestamp?";
}
}
}
/// Initialize class instance (initialize OnnxModels)
/// \param enableOptimizations is a switch to enable optimizations
/// \param threads is the number of active threads
template <typename TypeOutputScore>
void MlResponse<TypeOutputScore>::init(bool enableOptimizations, int threads)
{
uint8_t counterModel{0};
for (const auto& path : mPaths) {
this->mModels[counterModel].initModel(path, enableOptimizations, threads);
++counterModel;
}
}

template <typename TypeOutputScore>
template <typename T1, typename T2>
std::vector<TypeOutputScore> MlResponse<TypeOutputScore>::getModelOutput(T1& input, const T2& nModel)
{
if (nModel < 0 || static_cast<std::size_t>(nModel) >= this->mModels.size()) {
LOG(fatal) << "Model index " << nModel << " is out of range! The number of initialised models is " << this->mModels.size() << ". Please check your configurables.";
}

TypeOutputScore* outputPtr = this->mModels[nModel].evalModel(input);
return std::vector<TypeOutputScore>{outputPtr, outputPtr + mNClasses};
}

template <typename TypeOutputScore>
template <typename T1, typename T2>
bool MlResponse<TypeOutputScore>::isSelectedMl(T1& input, const T2& candVar)
{
int nModel = findBin(candVar);
auto output = getModelOutput(input, nModel);
uint8_t iClass{0};
for (const auto& outputValue : output) {
uint8_t dir = mCutDir.at(iClass);
if (dir != o2::cuts_ml::CutDirection::CutNot) {
if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) {
return false;
}
if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) {
return false;
}
}
++iClass;
}
return true;
}

template <typename TypeOutputScore>
template <typename T1, typename T2>
bool MlResponse<TypeOutputScore>::isSelectedMl(T1& input, const T2& candVar, std::vector<TypeOutputScore>& output)
{
int nModel = findBin(candVar);
output = getModelOutput(input, nModel);
uint8_t iClass{0};
for (const auto& outputValue : output) {
uint8_t dir = mCutDir.at(iClass);
if (dir != o2::cuts_ml::CutDirection::CutNot) {
if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) {
return false;
}
if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) {
return false;
}
}
++iClass;
}
return true;
}

template <typename TypeOutputScore>
void MlResponse<TypeOutputScore>::cacheInputFeaturesIndices(std::vector<std::string> const& cfgInputFeatures)
{
setAvailableInputFeatures();
for (const auto& inputFeature : cfgInputFeatures) {
if (mAvailableInputFeatures.count(inputFeature)) {
mCachedIndices.emplace_back(mAvailableInputFeatures[inputFeature]);
} else {
LOG(fatal) << "Input feature " << inputFeature << " not available! Please check your configurables.";
}
}
}

template <typename TypeOutputScore>
void MlResponse<TypeOutputScore>::setModelPathsLocal(const std::vector<std::string>& onnxFiles)
{
if (onnxFiles.size() != mNModels) {
LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables.";
}
mPaths = onnxFiles;
}

template bool MlResponse<float>::isSelectedMl<std::vector<float>, float>(std::vector<float>&, const float&, std::vector<float>&);
template class MlResponse<float>;
template class MlResponse<double>;
} // namespace o2::analysis
126 changes: 11 additions & 115 deletions Tools/ML/MlResponse.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,157 +61,50 @@ class MlResponse
/// \param cuts is a LabeledArray containing selections per bin
/// \param cutDir is a vector telling whether to reject score values greater or smaller than the threshold
/// \param nClasses is the number of classes for each model
void configure(const std::vector<double>& binsLimits, const o2::framework::LabeledArray<double>& cuts, const std::vector<int>& cutDir, const uint8_t& nClasses)
{
if (cutDir.size() != nClasses) {
LOG(fatal) << "Number of classes (" << static_cast<int>(nClasses) << ") different from the number of cuts on model scores (" << cutDir.size() << ")! Please check your configurables.";
}

mBinsLimits = binsLimits;
mCuts = cuts;
mCutDir = cutDir;
mNClasses = nClasses;
mNModels = binsLimits.size() - 1;
mModels = std::vector<o2::ml::OnnxModel>(mNModels);
mPaths = std::vector<std::string>(mNModels);
}
void configure(const std::vector<double>& binsLimits, const o2::framework::LabeledArray<double>& cuts, const std::vector<int>& cutDir, const uint8_t& nClasses);

/// Set model paths to CCDB
/// \param onnxFiles is a vector of onnx file names, one for each bin
/// \param ccdbApi is the CCDB API
/// \param pathsCCDB is a vector of model paths in CCDB, one for each bin
/// \param timestampCCDB is the CCDB timestamp
/// \note On the CCDB, different models must be stored in different folders
void setModelPathsCCDB(const std::vector<std::string>& onnxFiles, const o2::ccdb::CcdbApi& ccdbApi, const std::vector<std::string>& pathsCCDB, int64_t timestampCCDB)
{
if (onnxFiles.size() != mNModels) {
LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables.";
}
if (pathsCCDB.size() != mNModels) {
LOG(fatal) << "Number of expected models (" << mNModels << ") different from the number of CCDB paths (" << pathsCCDB.size() << ")! Please check your configurables.";
}

// check that the path is unique for each BDT model (otherwise CCDB download does not work as expected)
for (auto iThisFile{0}; iThisFile < mNModels; ++iThisFile) {
for (auto iOtherFile{iThisFile + 1}; iOtherFile < mNModels; ++iOtherFile) {
if ((pathsCCDB[iThisFile] == pathsCCDB[iOtherFile]) && (onnxFiles[iThisFile] != onnxFiles[iOtherFile])) {
LOGP(fatal, "More than one model ({} and {}) in the same CCDB directory ({})! Each directory in CCDB can contain only one model. Please check your configurables.", onnxFiles[iThisFile], onnxFiles[iOtherFile], pathsCCDB[iThisFile]);
}
}
}

for (auto iFile{0}; iFile < mNModels; ++iFile) {
std::map<std::string, std::string> metadata;
bool retrieveSuccess = ccdbApi.retrieveBlob(pathsCCDB[iFile], ".", metadata, timestampCCDB, false, onnxFiles[iFile]);
if (retrieveSuccess) {
mPaths[iFile] = onnxFiles[iFile];
} else {
LOG(fatal) << "Error encountered while accessing the ML model from " << pathsCCDB[iFile] << "! Maybe the ML model doesn't exist yet for this run number or timestamp?";
}
}
}
void setModelPathsCCDB(const std::vector<std::string>& onnxFiles, const o2::ccdb::CcdbApi& ccdbApi, const std::vector<std::string>& pathsCCDB, int64_t timestampCCDB);

/// Set model paths to local or cvmfs
/// \param onnxFiles is a vector of onnx file names, one for each bin
void setModelPathsLocal(const std::vector<std::string>& onnxFiles)
{
if (onnxFiles.size() != mNModels) {
LOG(fatal) << "Number of expected models (" << mNModels << ") different from the one set (" << onnxFiles.size() << ")! Please check your configurables.";
}
mPaths = onnxFiles;
}
void setModelPathsLocal(const std::vector<std::string>& onnxFiles);

/// Initialize class instance (initialize OnnxModels)
/// \param enableOptimizations is a switch to enable optimizations
/// \param threads is the number of active threads
void init(bool enableOptimizations = false, int threads = 0)
{
uint8_t counterModel{0};
for (const auto& path : mPaths) {
mModels[counterModel].initModel(path, enableOptimizations, threads);
++counterModel;
}
}
void init(bool enableOptimizations = false, int threads = 0);

/// Method to translate configurable input-feature strings into integers
/// \param cfgInputFeatures array of input features names
void cacheInputFeaturesIndices(std::vector<std::string> const& cfgInputFeatures)
{
setAvailableInputFeatures();
for (const auto& inputFeature : cfgInputFeatures) {
if (mAvailableInputFeatures.count(inputFeature)) {
mCachedIndices.emplace_back(mAvailableInputFeatures[inputFeature]);
} else {
LOG(fatal) << "Input feature " << inputFeature << " not available! Please check your configurables.";
}
}
}
void cacheInputFeaturesIndices(std::vector<std::string> const& cfgInputFeatures);

/// Get vector with model predictions
/// \param input a vector containing the values of features used in the model
/// \param nModel is the model index
/// \return model prediction for each class and the selected model
template <typename T1, typename T2>
std::vector<TypeOutputScore> getModelOutput(T1& input, const T2& nModel)
{
if (nModel < 0 || static_cast<std::size_t>(nModel) >= mModels.size()) {
LOG(fatal) << "Model index " << nModel << " is out of range! The number of initialised models is " << mModels.size() << ". Please check your configurables.";
}

TypeOutputScore* outputPtr = mModels[nModel].evalModel(input);
return std::vector<TypeOutputScore>{outputPtr, outputPtr + mNClasses};
}
std::vector<TypeOutputScore> getModelOutput(T1& input, const T2& nModel);

/// ML selections
/// \param input is the input features
/// \param candVar is the variable value (e.g. pT) used to select which model to use
/// \return boolean telling if model predictions pass the cuts
template <typename T1, typename T2>
bool isSelectedMl(T1& input, const T2& candVar)
{
int nModel = findBin(candVar);
auto output = getModelOutput(input, nModel);
uint8_t iClass{0};
for (const auto& outputValue : output) {
uint8_t dir = mCutDir.at(iClass);
if (dir != o2::cuts_ml::CutDirection::CutNot) {
if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) {
return false;
}
if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) {
return false;
}
}
++iClass;
}
return true;
}
bool isSelectedMl(T1& input, const T2& candVar);

/// ML selections
/// \param input is the input features
/// \param candVar is the variable value (e.g. pT) used to select which model to use
/// \param output is a container to be filled with model output
/// \return boolean telling if model predictions pass the cuts
template <typename T1, typename T2>
bool isSelectedMl(T1& input, const T2& candVar, std::vector<TypeOutputScore>& output)
{
int nModel = findBin(candVar);
output = getModelOutput(input, nModel);
uint8_t iClass{0};
for (const auto& outputValue : output) {
uint8_t dir = mCutDir.at(iClass);
if (dir != o2::cuts_ml::CutDirection::CutNot) {
if (dir == o2::cuts_ml::CutDirection::CutGreater && outputValue > mCuts.get(nModel, iClass)) {
return false;
}
if (dir == o2::cuts_ml::CutDirection::CutSmaller && outputValue < mCuts.get(nModel, iClass)) {
return false;
}
}
++iClass;
}
return true;
}
bool isSelectedMl(T1& input, const T2& candVar, std::vector<TypeOutputScore>& output);

protected:
std::vector<o2::ml::OnnxModel> mModels; // OnnxModel objects, one for each bin
Expand Down Expand Up @@ -244,6 +137,9 @@ class MlResponse
}
};

extern template class MlResponse<float>;
extern template class MlResponse<double>;

} // namespace analysis
} // namespace o2

Expand Down
Loading