ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
(Generate patch)

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.37.2.1 by ewv, Thu Jan 21 16:13:28 2010 UTC vs.
Revision 1.46 by spiga, Wed Jun 2 13:55:14 2010 UTC

# Line 8 | Line 8 | import DBSAPI.dbsApi
8   from DBSAPI.dbsApiException import *
9   import common
10   from crab_util import *
11 + from LumiList import LumiList
12   import os
13  
14  
# Line 88 | Line 89 | class DataDiscovery:
89          #       Attributes
90          self.datasetPath = datasetPath
91          # Analysis dataset is primary/processed/tier/definition
92 <        self.ads = len(self.datasetPath.split("/")) > 4 or len(self.datasetPath.split("/")) == 1
92 >        self.ads = len(self.datasetPath.split("/")) > 4
93          self.cfg_params = cfg_params
94          self.skipBlocks = skipAnBlocks
95  
# Line 101 | Line 102 | class DataDiscovery:
102          self.maxLumis = 0         # DBS output: total number of lumis
103          self.parent = {}          # DBS output: parents of each file
104          self.lumis = {}           # DBS output: lumis in each file
105 <
105 >        self.lumiMask = None
106 >        self.splitByLumi = False
107  
108      def fetchDBSInfo(self):
109          """
# Line 109 | Line 111 | class DataDiscovery:
111          """
112          ## get DBS URL
113          global_url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
114 <        caf_url = "http://cmsdbsprod.cern.ch/cms_dbs_caf_analysis_01/servlet/DBSServlet"
113 <        dbs_url_map  =   {'glite':    global_url,
114 <                          'glite_slc5':global_url,\
115 <                          'glitecoll':global_url,\
116 <                          'condor':   global_url,\
117 <                          'condor_g': global_url,\
118 <                          'glidein':  global_url,\
119 <                          'lsf':      global_url,\
120 <                          'caf':      caf_url,\
121 <                          'sge':      global_url,\
122 <                          'arc':      global_url,\
123 <                          'pbs':      global_url
124 <                          }
125 <
126 <        dbs_url_default = dbs_url_map[(common.scheduler.name()).lower()]
127 <        dbs_url=  self.cfg_params.get('CMSSW.dbs_url', dbs_url_default)
114 >        dbs_url=  self.cfg_params.get('CMSSW.dbs_url', global_url)
115          common.logger.info("Accessing DBS at: "+dbs_url)
116  
117          ## check if runs are selected
# Line 132 | Line 119 | class DataDiscovery:
119          if (self.cfg_params.has_key('CMSSW.runselection')):
120              runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
121  
122 +        ## check if various lumi parameters are set
123 +        self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
124 +        self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
125 +                          self.cfg_params.get('CMSSW.lumis_per_job',None)
126 +
127 +        lumiList = None
128 +        if self.lumiMask:
129 +            lumiList = LumiList(filename=self.lumiMask)
130 +        if runselection:
131 +            runList = LumiList(runs = runselection)
132  
133          self.splitByRun = int(self.cfg_params.get('CMSSW.split_by_run', 0))
137
134          common.logger.log(10-1,"runselection is: %s"%runselection)
135 +
136 +        if not self.splitByRun:
137 +            self.splitByLumi = self.lumiMask or self.lumiParams or self.ads
138 +
139          ## service API
140          args = {}
141          args['url']     = dbs_url
142          args['level']   = 'CRITICAL'
143        args['adshome']   = '$HOME/DBSADS'
143  
144          ## check if has been requested to use the parent info
145          useparent = int(self.cfg_params.get('CMSSW.use_parent',0))
# Line 152 | Line 151 | class DataDiscovery:
151          api = DBSAPI.dbsApi.DbsApi(args)
152          self.files = self.queryDbs(api,path=self.datasetPath,runselection=runselection,useParent=useparent)
153  
154 +        # Check to see what the dataset is
155 +        pdsName = self.datasetPath.split("/")[1]
156 +        primDSs = api.listPrimaryDatasets(pdsName)
157 +        dataType = primDSs[0]['Type']
158 +        common.logger.debug("Datatype is %s" % dataType)
159 +        if dataType == 'data' and not (self.splitByRun or self.splitByLumi):
160 +            msg = 'Data must be split by lumi or by run. ' \
161 +                  'Please see crab -help for the correct settings'
162 +            raise  CrabException(msg)
163 +
164 +
165 +
166          anFileBlocks = []
167          if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
168  
169          # parse files and fill arrays
170          for file in self.files :
171              parList  = []
172 <            lumiList = [] # List of tuples
172 >            fileLumis = [] # List of tuples
173              # skip already analyzed blocks
174              fileblock = file['Block']['Name']
175              if fileblock not in anFileBlocks :
# Line 166 | Line 177 | class DataDiscovery:
177                  # asked retry the list of parent for the given child
178                  if useparent==1:
179                      parList = [x['LogicalFileName'] for x in file['ParentList']]
180 <                if self.ads:
181 <                    lumiList = [ (x['RunNumber'], x['LumiSectionNumber'])
180 >                if self.splitByLumi:
181 >                    fileLumis = [ (x['RunNumber'], x['LumiSectionNumber'])
182                                   for x in file['LumiList'] ]
183                  self.parent[filename] = parList
184 <                self.lumis[filename] = lumiList
184 >                # For LumiMask, intersection of two lists.
185 >                if self.lumiMask and runselection:
186 >                    self.lumis[filename] = runList.filterLumis(lumiList.filterLumis(fileLumis))
187 >                elif runselection:
188 >                    self.lumis[filename] = runList.filterLumis(fileLumis)
189 >                elif self.lumiMask:
190 >                    self.lumis[filename] = lumiList.filterLumis(fileLumis)
191 >                else:
192 >                    self.lumis[filename] = fileLumis
193                  if filename.find('.dat') < 0 :
194                      events    = file['NumberOfEvents']
195                      # Count number of events and lumis per block
# Line 189 | Line 208 | class DataDiscovery:
208  
209                      # total number of events
210                      self.maxEvents += events
211 <                    self.maxLumis  += len(lumiList)
211 >                    self.maxLumis  += len(self.lumis[filename])
212  
213          if  self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
214              msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
215              raise  CrabException(msg)
216  
198        saveFblocks=''
199        for block in self.eventsPerBlock.keys() :
200            saveFblocks += str(block)+'\n'
201            common.logger.log(10-1,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
202        writeTXTfile(self, fileBlocks_FileName , saveFblocks)
217  
218          if len(self.eventsPerBlock) <= 0:
219              raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
# Line 210 | Line 224 | class DataDiscovery:
224      def queryDbs(self,api,path=None,runselection=None,useParent=None):
225  
226          allowedRetriveValue = ['retrive_block', 'retrive_run']
227 <        if self.ads: allowedRetriveValue.append('retrive_lumi')
227 >        if self.ads or self.lumiMask or self.lumiParams:
228 >            allowedRetriveValue.append('retrive_lumi')
229          if useParent == 1: allowedRetriveValue.append('retrive_parent')
230          common.logger.debug("Set of input parameters used for DBS query: %s" % allowedRetriveValue)
231          try:
232 <            if len(runselection) <=0 :
233 <                if useParent==1 or self.splitByRun==1 or self.ads:
232 >            if len(runselection) <=0 or self.splitByLumi:
233 >                if useParent==1 or self.splitByRun==1 or self.splitByLumi:
234                      if self.ads:
235                          files = api.listFiles(analysisDataset=path, retriveList=allowedRetriveValue)
236                      else :

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines