Skip to content

Commit

Permalink
Merge pull request #64 from lcorcodilos/dev
Browse files Browse the repository at this point in the history
Small fixes and the DeepAK8 top tagging module
  • Loading branch information
lcorcodilos authored Apr 15, 2021
2 parents fc6e68a + 77c3c55 commit dbb83af
Show file tree
Hide file tree
Showing 155 changed files with 5,237 additions and 899 deletions.
69 changes: 51 additions & 18 deletions TIMBER/Analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def GetCollectionNames(self):
Returns:
list(str): Collection names.
'''
return self._collectionOrg.collectionDict.keys()
return self._collectionOrg.GetCollectionNames()

def SetActiveNode(self,node):
'''Sets the active node.
Expand Down Expand Up @@ -817,6 +817,7 @@ def MakeWeightCols(self,name='',node=None,correctionNames=None,dropList=[],corre
if otherCorrection not in countedByCorrelation:
countedByCorrelation.append(otherCorrection)

if corr.GetType() == 'corr': continue
weights[corrname+'_up'] = weights['nominal']
weights[corrname+'_down'] = weights['nominal']

Expand Down Expand Up @@ -874,19 +875,23 @@ def GetWeightName(self,corr,variation,name=""):
raise NameError("The weight name `%s` does not exist in the current columns. Are you sure the correction has been made and MakeWeightCols has been called?"%weightname)
return weightname

def MakeTemplateHistos(self,templateHist,variables,node=None):
def MakeTemplateHistos(self,templateHist,variables,node=None,lazy=True):
'''Generates the uncertainty template histograms based on the weights created by #MakeWeightCols().
@param templateHist (TH1,TH2,TH3): A TH1, TH2, or TH3 used as a template to create the histograms.
@param templateHist (TH1,TH2,TH3,tuple): A TH1, TH2, TH3, or a tuple describing the TH* options.
Used as a template to create the histograms.
@param variables ([str]): A list of the columns/variables to plot (ex. ["x","y","z"]).
@param node (Node): Node to plot histograms from. Defaults to #ActiveNode.
@param lazy (bool): Make the action lazy which, in this case, means skipping the axis title
naming. The axis names will be saved in meta data of the returned group (Group.item_meta).
If using HistGroup.Do(), the axis titles will later be applied automatically.
Returns:
HistGroup: Uncertainty template histograms.
'''
if node == None: node = self.ActiveNode

weight_cols = [str(cname) for cname in node.DataFrame.GetColumnNames() if 'weight__' in str(cname)]
weight_cols = [str(cname) for cname in node.DataFrame.GetColumnNames() if str(cname).startswith('weight_')]
baseName = templateHist.GetName()
baseTitle = templateHist.GetTitle()
binningTuple,dimension = GetHistBinningTuple(templateHist)
Expand All @@ -903,24 +908,31 @@ def MakeTemplateHistos(self,templateHist,variables,node=None):

if dimension == 1:
thishist = node.DataFrame.Histo1D(template_attr,variables[0],cname)
meta_data = {"xtitle":variables[0]}
elif dimension == 2:
thishist = node.DataFrame.Histo2D(template_attr,variables[0],variables[1],cname)
meta_data = {"xtitle":variables[0], "ytitle":variables[1]}
elif dimension == 3:
thishist = node.DataFrame.Histo3D(template_attr,variables[0],variables[1],variables[2],cname)
meta_data = {"xtitle":variables[0], "ytitle":variables[1], "ztitle":variables[2]}

out.Add(histname,thishist)
if lazy:
out.Add(histname,thishist,meta_data)
else:
out.Add(histname,thishist)

# Wait to GetValue and SetTitle so that the histogram filling happens simultaneously
for k in out.keys():
if dimension == 1:
out[k].GetXaxis().SetTitle(variables[0])
elif dimension == 2:
out[k].GetXaxis().SetTitle(variables[0])
out[k].GetYaxis().SetTitle(variables[1])
elif dimension == 3:
out[k].GetXaxis().SetTitle(variables[0])
out[k].GetYaxis().SetTitle(variables[1])
out[k].GetZaxis().SetTitle(variables[2])
if not lazy:
for k in out.keys():
if dimension == 1:
out[k].GetXaxis().SetTitle(variables[0])
elif dimension == 2:
out[k].GetXaxis().SetTitle(variables[0])
out[k].GetYaxis().SetTitle(variables[1])
elif dimension == 3:
out[k].GetXaxis().SetTitle(variables[0])
out[k].GetYaxis().SetTitle(variables[1])
out[k].GetZaxis().SetTitle(variables[2])

return out

Expand All @@ -941,7 +953,7 @@ def DrawTemplates(self,hGroup,saveLocation,projection='X',projectionArgs=(),file
canvas = ROOT.TCanvas('c','',800,700)

# Initial setup
baseName = list(hGroup.keys())[0].split('__')[0]
baseName = [n for n in list(hGroup.keys()) if n.endswith('__nominal')][0].replace('__nominal','')

if isinstance(hGroup[baseName+'__nominal'],ROOT.TH2):
projectedGroup = hGroup.Do("Projection"+projection.upper(),projectionArgs)
Expand All @@ -958,7 +970,7 @@ def DrawTemplates(self,hGroup,saveLocation,projection='X',projectionArgs=(),file

corrections = []
for name in projectedGroup.keys():
corr = name.split('__')[1].replace('_up','').replace('_down','')
corr = name.split('__')[-1].replace('_up','').replace('_down','')
if corr not in corrections and corr != "nominal":
corrections.append(corr)

Expand Down Expand Up @@ -1543,16 +1555,22 @@ def __init__(self, name):
# string
#
# Group type - "cut", "var", "hist"
## @var item_meta
# OrderedDict()
#
# Storage container for generic meta information on the group.
super(Group, self).__init__()
self.name = name
self.items = OrderedDict()
self.type = None
self.item_meta = OrderedDict()

def Add(self,name,item,makeCopy=False):
def Add(self,name,item,meta={},makeCopy=False):
'''Add item to Group with a name. Modifies in-place if copy == False.
@param name (str): Name/key for added item.
@param item (obj): Item to add.
@param meta (dict):
@param makeCopy (bool, optional): Creates a copy of the group with the item added.
Returns:
Expand All @@ -1565,9 +1583,11 @@ def Add(self,name,item,makeCopy=False):
elif self.type == 'var': newGroup = VarGroup(self.name+'+'+name)
elif self.type == 'cut': newGroup = CutGroup(self.name+'+'+name)
newGroup.items = added
newGroup.item_meta = meta
return newGroup
else:
self.items[name] = item
self.item_meta[name] = meta

def Drop(self,name,makeCopy=False):
'''Drop item from Group with provided name/key. Modifies in-place if copy == False.
Expand Down Expand Up @@ -1724,7 +1744,16 @@ def Do(self,THmethod,argsTuple=()):
returnNone = False
# Loop over hists
for name,hist in self.items.items():
# Handle lazy axis naming
if 'xtitle' in self.item_meta.keys():
hist.GetXaxis().SetTitle(self.item_meta['xtitle'])
if 'ytitle' in self.item_meta.keys():
hist.GetYaxis().SetTitle(self.item_meta['ytitle'])
if 'ztitle' in self.item_meta.keys():
hist.GetZaxis().SetTitle(self.item_meta['ztitle'])

out = getattr(hist,THmethod)(*argsTuple)

# If None type, set returnNone = True
if out == None and returnNone == False: returnNone = True
# If return is not None, add
Expand Down Expand Up @@ -1772,6 +1801,8 @@ def __init__(self,name,script,constructor=[],mainFunc='eval',columnList=None,isC
Defaults to None and the standard NanoAOD columns from LoadColumnNames() will be used.
@param isClone (bool, optional): For internal use when cloning. Defaults to False. If True, will
not duplicate compile the same script if two functions are needed in one C++ script.
@param cloneFuncInfo (dict, optional): For internal use when cloning. Defaults to None. Should be the
_funcInfo from the object from which this one is cloned.
'''

## @var name
Expand Down Expand Up @@ -2068,6 +2099,8 @@ def __init__(self,name,script,constructor=[],mainFunc='eval',corrtype=None,colum
Defaults to None and the standard NanoAOD columns from LoadColumnNames() will be used.
@param isClone (bool, optional): For internal use when cloning. Defaults to False. If True, will
not duplicate compile the same script if two functions are needed in one C++ script.
@param cloneFuncInfo (dict, optional): For internal use when cloning. Defaults to None. Should be the
_funcInfo from the object from which this one i
'''

super(Correction,self).__init__(name,script,constructor,mainFunc,columnList,isClone,cloneFuncInfo)
Expand Down
63 changes: 35 additions & 28 deletions TIMBER/CollectionOrganizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,22 +13,22 @@ def __init__(self, rdf):
@param rdf (RDataFrame): RDataFrame from which to organize.
'''
self.baseBranches = [str(b) for b in rdf.GetColumnNames()]
self.generateFromRDF(rdf)
self.builtCollections = []
self._baseBranches = [str(b) for b in rdf.GetColumnNames()]
self._generateFromRDF(rdf)
self._builtCollections = []

def generateFromRDF(self, rdf):
def _generateFromRDF(self, rdf):
'''Generate the collection from the RDataFrame.
@param rdf (RDataFrame): RDataFrame from which to organize.
'''
self.collectionDict = {}
self.otherBranches = {}
self._collectionDict = {}
self._otherBranches = {}

for b in self.baseBranches:
for b in self._baseBranches:
self.AddBranch(b,rdf.GetColumnType(b))

def parsetype(self, t):
def _parsetype(self, t):
'''Deduce the type that TIMBER needs to see for the
collection structs. If t is an RVec, deduce the internal type
of the RVec.
Expand Down Expand Up @@ -58,8 +58,16 @@ def AddCollection(self, c):
@param c (str): Collection name only.
'''
if c not in self.collectionDict.keys():
self.collectionDict[c] = {'alias': False}
if c not in self._collectionDict.keys():
self._collectionDict[c] = {'alias': False}

def GetCollectionNames(self):
'''Return the list of all collection names.
Returns:
list(str): All tracked collection names.
'''
return self._collectionDict.keys()

def GetCollectionAttributes(self, c):
'''Get all attributes of a collection. Example, for the 'Electron'
Expand All @@ -70,7 +78,7 @@ def GetCollectionAttributes(self, c):
Returns:
list(str): List of attributes for the collection.
'''
return [c for c in self.collectionDict[c] if c != 'alias']
return [c for c in self._collectionDict[c] if c != 'alias']

def AddBranch(self, b, btype=''):
'''Add a branch to track. Will deduce if it is in a collection
Expand All @@ -82,16 +90,16 @@ def AddBranch(self, b, btype=''):
'''
collname = b.split('_')[0]
varname = '_'.join(b.split('_')[1:])
typeStr = self.parsetype(btype)
typeStr = self._parsetype(btype)

if typeStr == False or varname == '' or 'n'+collname not in self.baseBranches:
self.otherBranches[b] = {
if typeStr == False or varname == '' or 'n'+collname not in self._baseBranches:
self._otherBranches[b] = {
'type': typeStr,
'alias': False
}
elif varname != '':
self.AddCollection(collname)
self.collectionDict[collname][varname] = {
self._collectionDict[collname][varname] = {
'type': typeStr,
'alias': False
}
Expand All @@ -107,16 +115,16 @@ def Alias(self, alias, name):
ValueError: Entries do not exist so an alias cannot be added.
'''
# Name is either in otherBranches, is a collection name, or is a full name <collection>_<attr>
if name in self.otherBranches.keys():
self.otherBranches[name]['alias'] = alias
elif name in self.collectionDict.keys():
self.collectionDict[name]['alias'] = alias
if name in self._otherBranches.keys():
self._otherBranches[name]['alias'] = alias
elif name in self._collectionDict.keys():
self._collectionDict[name]['alias'] = alias
else:
collname = name.split('_')[0]
varname = '_'.join(name.split('_')[1:])
if collname in self.collectionDict.keys():
if varname in self.collectionDict[collname].keys():
self.collectionDict[collname][varname]['alias'] = alias
if collname in self._collectionDict.keys():
if varname in self._collectionDict[collname].keys():
self._collectionDict[collname][varname]['alias'] = alias
else:
raise ValueError('Cannot add alias `%s` because attribute `%s` does not exist in collection `%s`'%(alias,varname,collname))
else:
Expand All @@ -139,10 +147,10 @@ def BuildCppCollection(self,collection,node,silent=True):
newNode = node
attributes = []
for aname in self.GetCollectionAttributes(collection):
attributes.append('%s %s'%(self.collectionDict[collection][aname]['type'], aname))
attributes.append('%s %s'%(self._collectionDict[collection][aname]['type'], aname))

if collection+'s' not in self.builtCollections:
self.builtCollections.append(collection+'s')
if collection+'s' not in self._builtCollections:
self._builtCollections.append(collection+'s')
CompileCpp(StructDef(collection,attributes))
newNode = newNode.Define(collection+'s', StructObj(collection,attributes),silent=silent)
else:
Expand All @@ -162,8 +170,8 @@ def CollectionDefCheck(self, action_str, node):
Node: Manipulated node with the C++ struct built (the action string is not applied though).
'''
newNode = node
for c in self.collectionDict.keys():
if re.search(r"\b" + re.escape(c+'s') + r"\b", action_str) and (c+'s' not in self.builtCollections):
for c in self._collectionDict.keys():
if re.search(r"\b" + re.escape(c+'s') + r"\b", action_str) and (c+'s' not in self._builtCollections):
print ('MAKING %ss for %s'%(c,action_str))
newNode = self.BuildCppCollection(c,newNode,silent=True)
return newNode
Expand Down Expand Up @@ -214,7 +222,6 @@ def StructObj(collectionName, varList):
return {0}s;
'''
attr_assignment_str = ''
print (varList)
for i,v in enumerate(varList):
varname = v.split(' ')[-1]
attr_assignment_str += '{0}_{1}[i],'.format(collectionName, varname)
Expand Down
40 changes: 40 additions & 0 deletions TIMBER/Framework/include/DeepAK8_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#ifndef _TIMBER_DEEPAK8_HELPER
#define _TIMBER_DEEPAK8_HELPER
#include <vector>
#include <map>
#include <string>
#include <fstream>
#include <iostream>
#include <cstdlib>
#include <sstream>

/**
* @brief C++ class to access scale factors associated with DeepAK8 tagging.
*/
class DeepAK8_helper {
private:
std::string entry_to_find;
std::vector<std::vector<float> > _values;
std::string _p = std::string(std::getenv("TIMBERPATH"))+"TIMBER/data/OfficialSFs/DeepAK8V2_Top_W_SFs.csv";

public:
/**
* @brief Construct a new DeepAK8_helper object
*
* @param particle Either "Top" or "W"
* @param year
* @param workingpoint Ex. "0p5"
* @param massDecorr
*/
DeepAK8_helper(std::string particle, int year, std::string workingpoint, bool massDecorr);
~DeepAK8_helper(){};
/**
* @brief Lookup the scale factor and variations based on the AK8 jet momentum.
* Returned values are absolute {nominal, up, down}.
*
* @param pt
* @return std::vector<float> {nominal, up, down} (absolute)
*/
std::vector<float> eval(float pt);
};
#endif
23 changes: 23 additions & 0 deletions TIMBER/Framework/include/GenMatching.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,17 @@ class Particle {
* @param idx Child index
*/
void AddChild(int idx);
/**
* @brief Return the index of the parent.
*
* @return int
*/
int GetParent();
/**
* @brief Get vector of indices of the children.
*
* @return std::vector<int>
*/
std::vector<int> GetChild();
/**
* @brief Calculate \f$\Delta R\f$ between current particle and input vector.
Expand Down Expand Up @@ -123,13 +133,26 @@ class GenParticleTree
Particle _noneParticle;

public:
/**
* @brief Construct a new GenParticleTree object
*
* @param nParticles
*/
GenParticleTree(int nParticles);
/**
* @brief Add particle to tree.
*
* @param particle
*/
Particle* AddParticle(Particle particle);
/**
* @brief Add particle to tree.
*
* @tparam T GenPartStruct
* @param index Index of the particle in the GenPart collection
* @param p A GenPartStruct from TIMBER
* @return Particle*
*/
template <class T>
Particle* AddParticle(int index, T p) {
Particle particle(index, p);
Expand Down
Loading

0 comments on commit dbb83af

Please sign in to comment.