Merge pull request #64 from lcorcodilos/dev

Small fixes and the DeepAK8 top tagging module
lcorcodilos · Apr 15, 2021 · dbb83af · dbb83af
2 parents fc6e68a + 77c3c55
commit dbb83af
Show file tree

Hide file tree

Showing 155 changed files with 5,237 additions and 899 deletions.
diff --git a/TIMBER/Analyzer.py b/TIMBER/Analyzer.py
@@ -251,7 +251,7 @@ def GetCollectionNames(self):
         Returns:
             list(str): Collection names.
         '''
-        return self._collectionOrg.collectionDict.keys()
+        return self._collectionOrg.GetCollectionNames()
 
     def SetActiveNode(self,node):
         '''Sets the active node.
@@ -817,6 +817,7 @@ def MakeWeightCols(self,name='',node=None,correctionNames=None,dropList=[],corre
                         if otherCorrection not in countedByCorrelation:
                             countedByCorrelation.append(otherCorrection)
 
+            if corr.GetType() == 'corr': continue
             weights[corrname+'_up'] = weights['nominal']
             weights[corrname+'_down'] = weights['nominal']
 
@@ -874,19 +875,23 @@ def GetWeightName(self,corr,variation,name=""):
             raise NameError("The weight name `%s` does not exist in the current columns. Are you sure the correction has been made and MakeWeightCols has been called?"%weightname)
         return weightname
 
-    def MakeTemplateHistos(self,templateHist,variables,node=None):
+    def MakeTemplateHistos(self,templateHist,variables,node=None,lazy=True):
         '''Generates the uncertainty template histograms based on the weights created by #MakeWeightCols(). 
 
-        @param templateHist (TH1,TH2,TH3): A TH1, TH2, or TH3 used as a template to create the histograms.
+        @param templateHist (TH1,TH2,TH3,tuple): A TH1, TH2, TH3, or a tuple describing the TH* options.
+            Used as a template to create the histograms.
         @param variables ([str]): A list of the columns/variables to plot (ex. ["x","y","z"]).
         @param node (Node): Node to plot histograms from. Defaults to #ActiveNode.
+        @param lazy (bool): Make the action lazy which, in this case, means skipping the axis title
+            naming. The axis names will be saved in meta data of the returned group (Group.item_meta).
+            If using HistGroup.Do(), the axis titles will later be applied automatically.
 
         Returns:
             HistGroup: Uncertainty template histograms.
         '''
         if node == None: node = self.ActiveNode
 
-        weight_cols = [str(cname) for cname in node.DataFrame.GetColumnNames() if 'weight__' in str(cname)]
+        weight_cols = [str(cname) for cname in node.DataFrame.GetColumnNames() if str(cname).startswith('weight_')]
         baseName = templateHist.GetName()
         baseTitle = templateHist.GetTitle()
         binningTuple,dimension = GetHistBinningTuple(templateHist)
@@ -903,24 +908,31 @@ def MakeTemplateHistos(self,templateHist,variables,node=None):
 
             if dimension == 1: 
                 thishist = node.DataFrame.Histo1D(template_attr,variables[0],cname)
+                meta_data = {"xtitle":variables[0]}
             elif dimension == 2: 
                 thishist = node.DataFrame.Histo2D(template_attr,variables[0],variables[1],cname)
+                meta_data = {"xtitle":variables[0], "ytitle":variables[1]}
             elif dimension == 3: 
                 thishist = node.DataFrame.Histo3D(template_attr,variables[0],variables[1],variables[2],cname)
+                meta_data = {"xtitle":variables[0], "ytitle":variables[1], "ztitle":variables[2]}
 
-            out.Add(histname,thishist)
+            if lazy:
+                out.Add(histname,thishist,meta_data)
+            else:
+                out.Add(histname,thishist)
 
         # Wait to GetValue and SetTitle so that the histogram filling happens simultaneously
-        for k in out.keys():
-            if dimension == 1: 
-                out[k].GetXaxis().SetTitle(variables[0])
-            elif dimension == 2: 
-                out[k].GetXaxis().SetTitle(variables[0])
-                out[k].GetYaxis().SetTitle(variables[1])
-            elif dimension == 3: 
-                out[k].GetXaxis().SetTitle(variables[0])
-                out[k].GetYaxis().SetTitle(variables[1])
-                out[k].GetZaxis().SetTitle(variables[2])
+        if not lazy:
+            for k in out.keys():
+                if dimension == 1: 
+                    out[k].GetXaxis().SetTitle(variables[0])
+                elif dimension == 2: 
+                    out[k].GetXaxis().SetTitle(variables[0])
+                    out[k].GetYaxis().SetTitle(variables[1])
+                elif dimension == 3: 
+                    out[k].GetXaxis().SetTitle(variables[0])
+                    out[k].GetYaxis().SetTitle(variables[1])
+                    out[k].GetZaxis().SetTitle(variables[2])
 
         return out
 
@@ -941,7 +953,7 @@ def DrawTemplates(self,hGroup,saveLocation,projection='X',projectionArgs=(),file
         canvas = ROOT.TCanvas('c','',800,700)
 
         # Initial setup
-        baseName = list(hGroup.keys())[0].split('__')[0]
+        baseName = [n for n in list(hGroup.keys()) if n.endswith('__nominal')][0].replace('__nominal','')
 
         if isinstance(hGroup[baseName+'__nominal'],ROOT.TH2):
             projectedGroup = hGroup.Do("Projection"+projection.upper(),projectionArgs)
@@ -958,7 +970,7 @@ def DrawTemplates(self,hGroup,saveLocation,projection='X',projectionArgs=(),file
 
         corrections = []
         for name in projectedGroup.keys():
-            corr = name.split('__')[1].replace('_up','').replace('_down','')
+            corr = name.split('__')[-1].replace('_up','').replace('_down','')
             if corr not in corrections and corr != "nominal":
                 corrections.append(corr)
 
@@ -1543,16 +1555,22 @@ def __init__(self, name):
         # string
         #
         # Group type - "cut", "var", "hist"
+        ## @var item_meta
+        # OrderedDict()
+        #
+        # Storage container for generic meta information on the group.
         super(Group, self).__init__()
         self.name = name
         self.items = OrderedDict()
         self.type = None
+        self.item_meta = OrderedDict()
 
-    def Add(self,name,item,makeCopy=False):
+    def Add(self,name,item,meta={},makeCopy=False):
         '''Add item to Group with a name. Modifies in-place if copy == False.
 
         @param name (str): Name/key for added item.
         @param item (obj): Item to add.
+        @param meta (dict): 
         @param makeCopy (bool, optional): Creates a copy of the group with the item added.
 
         Returns:
@@ -1565,9 +1583,11 @@ def Add(self,name,item,makeCopy=False):
             elif self.type == 'var': newGroup = VarGroup(self.name+'+'+name)
             elif self.type == 'cut': newGroup = CutGroup(self.name+'+'+name)
             newGroup.items = added
+            newGroup.item_meta = meta
             return newGroup
         else:
             self.items[name] = item 
+            self.item_meta[name] = meta
 
     def Drop(self,name,makeCopy=False):
         '''Drop item from Group with provided name/key. Modifies in-place if copy == False.
@@ -1724,7 +1744,16 @@ def Do(self,THmethod,argsTuple=()):
         returnNone = False
         # Loop over hists
         for name,hist in self.items.items():
+            # Handle lazy axis naming
+            if 'xtitle' in self.item_meta.keys():
+                hist.GetXaxis().SetTitle(self.item_meta['xtitle'])
+            if 'ytitle' in self.item_meta.keys():
+                hist.GetYaxis().SetTitle(self.item_meta['ytitle'])
+            if 'ztitle' in self.item_meta.keys():
+                hist.GetZaxis().SetTitle(self.item_meta['ztitle'])
+
             out = getattr(hist,THmethod)(*argsTuple)
+
             # If None type, set returnNone = True
             if out == None and returnNone == False: returnNone = True
             # If return is not None, add 
@@ -1772,6 +1801,8 @@ def __init__(self,name,script,constructor=[],mainFunc='eval',columnList=None,isC
                 Defaults to None and the standard NanoAOD columns from LoadColumnNames() will be used.
         @param isClone (bool, optional): For internal use when cloning. Defaults to False. If True, will
                 not duplicate compile the same script if two functions are needed in one C++ script.
+        @param cloneFuncInfo (dict, optional): For internal use when cloning. Defaults to None. Should be the
+                _funcInfo from the object from which this one is cloned.
         '''
 
         ## @var name
@@ -2068,6 +2099,8 @@ def __init__(self,name,script,constructor=[],mainFunc='eval',corrtype=None,colum
                 Defaults to None and the standard NanoAOD columns from LoadColumnNames() will be used.
         @param isClone (bool, optional): For internal use when cloning. Defaults to False. If True, will
                 not duplicate compile the same script if two functions are needed in one C++ script.
+        @param cloneFuncInfo (dict, optional): For internal use when cloning. Defaults to None. Should be the
+                _funcInfo from the object from which this one i
         '''
 
         super(Correction,self).__init__(name,script,constructor,mainFunc,columnList,isClone,cloneFuncInfo)

diff --git a/TIMBER/CollectionOrganizer.py b/TIMBER/CollectionOrganizer.py
@@ -13,22 +13,22 @@ def __init__(self, rdf):
 
         @param rdf (RDataFrame): RDataFrame from which to organize.
         '''
-        self.baseBranches = [str(b) for b in rdf.GetColumnNames()]
-        self.generateFromRDF(rdf)
-        self.builtCollections = []
+        self._baseBranches = [str(b) for b in rdf.GetColumnNames()]
+        self._generateFromRDF(rdf)
+        self._builtCollections = []
 
-    def generateFromRDF(self, rdf):
+    def _generateFromRDF(self, rdf):
         '''Generate the collection from the RDataFrame.
 
         @param rdf (RDataFrame): RDataFrame from which to organize.
         '''
-        self.collectionDict = {}
-        self.otherBranches = {}
+        self._collectionDict = {}
+        self._otherBranches = {}
 
-        for b in self.baseBranches:
+        for b in self._baseBranches:
             self.AddBranch(b,rdf.GetColumnType(b))
 
-    def parsetype(self, t):
+    def _parsetype(self, t):
         '''Deduce the type that TIMBER needs to see for the 
         collection structs. If t is an RVec, deduce the internal type
         of the RVec.
@@ -58,8 +58,16 @@ def AddCollection(self, c):
 
         @param c (str): Collection name only.
         '''
-        if c not in self.collectionDict.keys():
-            self.collectionDict[c] = {'alias': False}
+        if c not in self._collectionDict.keys():
+            self._collectionDict[c] = {'alias': False}
+
+    def GetCollectionNames(self):
+        '''Return the list of all collection names.
+
+        Returns:
+            list(str): All tracked collection names.
+        '''
+        return self._collectionDict.keys()
 
     def GetCollectionAttributes(self, c):
         '''Get all attributes of a collection. Example, for the 'Electron'
@@ -70,7 +78,7 @@ def GetCollectionAttributes(self, c):
         Returns:
             list(str): List of attributes for the collection.
         '''
-        return [c for c in self.collectionDict[c] if c != 'alias']
+        return [c for c in self._collectionDict[c] if c != 'alias']
 
     def AddBranch(self, b, btype=''):
         '''Add a branch to track. Will deduce if it is in a collection
@@ -82,16 +90,16 @@ def AddBranch(self, b, btype=''):
         '''
         collname = b.split('_')[0]
         varname = '_'.join(b.split('_')[1:])
-        typeStr = self.parsetype(btype)
+        typeStr = self._parsetype(btype)
 
-        if typeStr == False or varname == '' or 'n'+collname not in self.baseBranches:
-            self.otherBranches[b] = {
+        if typeStr == False or varname == '' or 'n'+collname not in self._baseBranches:
+            self._otherBranches[b] = {
                 'type': typeStr,
                 'alias': False
             }
         elif varname != '':
             self.AddCollection(collname)
-            self.collectionDict[collname][varname] = {
+            self._collectionDict[collname][varname] = {
                 'type': typeStr,
                 'alias': False
             }
@@ -107,16 +115,16 @@ def Alias(self, alias, name):
             ValueError: Entries do not exist so an alias cannot be added.
         '''
         # Name is either in otherBranches, is a collection name, or is a full name <collection>_<attr>
-        if name in self.otherBranches.keys():
-            self.otherBranches[name]['alias'] = alias
-        elif name in self.collectionDict.keys():
-            self.collectionDict[name]['alias'] = alias
+        if name in self._otherBranches.keys():
+            self._otherBranches[name]['alias'] = alias
+        elif name in self._collectionDict.keys():
+            self._collectionDict[name]['alias'] = alias
         else:
             collname = name.split('_')[0]
             varname = '_'.join(name.split('_')[1:])
-            if collname in self.collectionDict.keys():
-                if varname in self.collectionDict[collname].keys():
-                    self.collectionDict[collname][varname]['alias'] = alias
+            if collname in self._collectionDict.keys():
+                if varname in self._collectionDict[collname].keys():
+                    self._collectionDict[collname][varname]['alias'] = alias
                 else:
                     raise ValueError('Cannot add alias `%s` because attribute `%s` does not exist in collection `%s`'%(alias,varname,collname))
             else:
@@ -139,10 +147,10 @@ def BuildCppCollection(self,collection,node,silent=True):
         newNode = node
         attributes = []
         for aname in self.GetCollectionAttributes(collection):
-            attributes.append('%s %s'%(self.collectionDict[collection][aname]['type'], aname))
+            attributes.append('%s %s'%(self._collectionDict[collection][aname]['type'], aname))
 
-        if collection+'s' not in self.builtCollections:
-            self.builtCollections.append(collection+'s')
+        if collection+'s' not in self._builtCollections:
+            self._builtCollections.append(collection+'s')
             CompileCpp(StructDef(collection,attributes))
             newNode = newNode.Define(collection+'s', StructObj(collection,attributes),silent=silent)
         else:
@@ -162,8 +170,8 @@ def CollectionDefCheck(self, action_str, node):
             Node: Manipulated node with the C++ struct built (the action string is not applied though).
         '''
         newNode = node
-        for c in self.collectionDict.keys():
-            if re.search(r"\b" + re.escape(c+'s') + r"\b", action_str) and (c+'s' not in self.builtCollections):
+        for c in self._collectionDict.keys():
+            if re.search(r"\b" + re.escape(c+'s') + r"\b", action_str) and (c+'s' not in self._builtCollections):
                 print ('MAKING %ss for %s'%(c,action_str))
                 newNode = self.BuildCppCollection(c,newNode,silent=True)
         return newNode
@@ -214,7 +222,6 @@ def StructObj(collectionName, varList):
 return {0}s;
 '''
     attr_assignment_str = ''
-    print (varList)
     for i,v in enumerate(varList):
         varname = v.split(' ')[-1]
         attr_assignment_str += '{0}_{1}[i],'.format(collectionName, varname)

diff --git a/TIMBER/Framework/include/DeepAK8_helper.h b/TIMBER/Framework/include/DeepAK8_helper.h
@@ -0,0 +1,40 @@
+#ifndef _TIMBER_DEEPAK8_HELPER
+#define _TIMBER_DEEPAK8_HELPER
+#include <vector>
+#include <map>
+#include <string>
+#include <fstream>
+#include <iostream>
+#include <cstdlib>
+#include <sstream>
+
+/**
+ * @brief C++ class to access scale factors associated with DeepAK8 tagging.
+ */
+class DeepAK8_helper {
+    private:
+        std::string entry_to_find;
+        std::vector<std::vector<float> > _values;
+        std::string _p = std::string(std::getenv("TIMBERPATH"))+"TIMBER/data/OfficialSFs/DeepAK8V2_Top_W_SFs.csv";
+
+    public:
+        /**
+         * @brief Construct a new DeepAK8_helper object
+         * 
+         * @param particle Either "Top" or "W"
+         * @param year 
+         * @param workingpoint Ex. "0p5"
+         * @param massDecorr 
+         */
+        DeepAK8_helper(std::string particle, int year, std::string workingpoint, bool massDecorr);
+        ~DeepAK8_helper(){};
+        /**
+         * @brief Lookup the scale factor and variations based on the AK8 jet momentum.
+         * Returned values are absolute {nominal, up, down}.
+         * 
+         * @param pt 
+         * @return std::vector<float> {nominal, up, down} (absolute)
+         */
+        std::vector<float> eval(float pt);
+};
+#endif
diff --git a/TIMBER/Framework/include/GenMatching.h b/TIMBER/Framework/include/GenMatching.h
@@ -75,7 +75,17 @@ class Particle {
          * @param idx Child index
          */
         void AddChild(int idx);
+        /**
+         * @brief Return the index of the parent.
+         * 
+         * @return int 
+         */
         int GetParent();
+        /**
+         * @brief Get vector of indices of the children.
+         * 
+         * @return std::vector<int> 
+         */
         std::vector<int> GetChild();
         /**
          * @brief Calculate \f$\Delta R\f$ between current particle and input vector.
@@ -123,13 +133,26 @@ class GenParticleTree
         Particle _noneParticle;
 
     public:
+        /**
+         * @brief Construct a new GenParticleTree object
+         * 
+         * @param nParticles 
+         */
         GenParticleTree(int nParticles);
         /**
          * @brief Add particle to tree.
          * 
          * @param particle 
          */
         Particle* AddParticle(Particle particle);
+        /**
+         * @brief Add particle to tree.
+         * 
+         * @tparam T GenPartStruct
+         * @param index Index of the particle in the GenPart collection
+         * @param p A GenPartStruct from TIMBER
+         * @return Particle* 
+         */
         template <class T>
         Particle* AddParticle(int index, T p) {
             Particle particle(index, p);