ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
Revision: 1.47
Committed: Tue Jun 29 17:46:42 2010 UTC (14 years, 10 months ago) by ewv
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_7_4_pre1
Changes since 1.46: +7 -3 lines
Log Message:
Grab LumiList from CMSSW libraries for CMSSW 3.8 and higher

File Contents

# User Rev Content
1 gutsche 1.6 #!/usr/bin/env python
2 ewv 1.33
3 ewv 1.47 __revision__ = "$Id: DataDiscovery.py,v 1.46 2010/06/02 13:55:14 spiga Exp $"
4     __version__ = "$Revision: 1.46 $"
5 ewv 1.33
6 slacapra 1.18 import exceptions
7     import DBSAPI.dbsApi
8 ewv 1.32 from DBSAPI.dbsApiException import *
9 slacapra 1.18 import common
10     from crab_util import *
11 ewv 1.47 try: # Can remove when CMSSW 3.7 and earlier are dropped
12     from FWCore.PythonUtilities.LumiList import LumiList
13     except ImportError:
14     from LumiList import LumiList
15    
16 ewv 1.32 import os
17    
18 afanfani 1.1
19 afanfani 1.3
20 slacapra 1.18 class DBSError(exceptions.Exception):
21     def __init__(self, errorName, errorMessage):
22     args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
23     exceptions.Exception.__init__(self, args)
24     pass
25 ewv 1.32
26 slacapra 1.18 def getErrorMessage(self):
27     """ Return error message """
28     return "%s" % (self.args)
29    
30 ewv 1.32
31    
32 slacapra 1.18 class DBSInvalidDataTierError(exceptions.Exception):
33     def __init__(self, errorName, errorMessage):
34     args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
35     exceptions.Exception.__init__(self, args)
36     pass
37 ewv 1.32
38 slacapra 1.18 def getErrorMessage(self):
39     """ Return error message """
40     return "%s" % (self.args)
41    
42 ewv 1.32
43    
44 slacapra 1.18 class DBSInfoError:
45     def __init__(self, url):
46     print '\nERROR accessing DBS url : '+url+'\n'
47     pass
48    
49 ewv 1.32
50    
51 afanfani 1.3 class DataDiscoveryError(exceptions.Exception):
52 slacapra 1.7 def __init__(self, errorMessage):
53 gutsche 1.15 self.args=errorMessage
54 slacapra 1.14 exceptions.Exception.__init__(self, self.args)
55 slacapra 1.7 pass
56    
57     def getErrorMessage(self):
58     """ Return exception error """
59     return "%s" % (self.args)
60 afanfani 1.3
61 ewv 1.32
62    
63 afanfani 1.3 class NotExistingDatasetError(exceptions.Exception):
64 slacapra 1.7 def __init__(self, errorMessage):
65 gutsche 1.15 self.args=errorMessage
66 slacapra 1.14 exceptions.Exception.__init__(self, self.args)
67 slacapra 1.7 pass
68    
69     def getErrorMessage(self):
70     """ Return exception error """
71     return "%s" % (self.args)
72 afanfani 1.1
73 ewv 1.32
74    
75 afanfani 1.3 class NoDataTierinProvenanceError(exceptions.Exception):
76 slacapra 1.7 def __init__(self, errorMessage):
77 gutsche 1.15 self.args=errorMessage
78 slacapra 1.14 exceptions.Exception.__init__(self, self.args)
79 slacapra 1.7 pass
80    
81     def getErrorMessage(self):
82     """ Return exception error """
83     return "%s" % (self.args)
84 afanfani 1.1
85 ewv 1.32
86    
87 afanfani 1.1 class DataDiscovery:
88 ewv 1.32 """
89     Class to find and extact info from published data
90     """
91 spiga 1.22 def __init__(self, datasetPath, cfg_params, skipAnBlocks):
92 afanfani 1.1
93 slacapra 1.18 # Attributes
94 slacapra 1.11 self.datasetPath = datasetPath
95 ewv 1.32 # Analysis dataset is primary/processed/tier/definition
96 spiga 1.34 self.ads = len(self.datasetPath.split("/")) > 4
97 afanfani 1.1 self.cfg_params = cfg_params
98 spiga 1.22 self.skipBlocks = skipAnBlocks
99 afanfani 1.1
100 slacapra 1.11 self.eventsPerBlock = {} # DBS output: map fileblocks-events for collection
101     self.eventsPerFile = {} # DBS output: map files-events
102 ewv 1.32 # self.lumisPerBlock = {} # DBS output: number of lumis in each block
103     # self.lumisPerFile = {} # DBS output: number of lumis in each file
104     self.blocksinfo = {} # DBS output: map fileblocks-files
105 slacapra 1.18 self.maxEvents = 0 # DBS output: max events
106 ewv 1.32 self.maxLumis = 0 # DBS output: total number of lumis
107     self.parent = {} # DBS output: parents of each file
108     self.lumis = {} # DBS output: lumis in each file
109 spiga 1.41 self.lumiMask = None
110 ewv 1.45 self.splitByLumi = False
111 afanfani 1.1
112     def fetchDBSInfo(self):
113     """
114     Contact DBS
115     """
116 slacapra 1.11 ## get DBS URL
117 spiga 1.25 global_url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
118 spiga 1.43 dbs_url= self.cfg_params.get('CMSSW.dbs_url', global_url)
119 spiga 1.36 common.logger.info("Accessing DBS at: "+dbs_url)
120 slacapra 1.18
121     ## check if runs are selected
122 slacapra 1.19 runselection = []
123     if (self.cfg_params.has_key('CMSSW.runselection')):
124 slacapra 1.18 runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
125    
126 ewv 1.42 ## check if various lumi parameters are set
127 spiga 1.41 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
128 ewv 1.42 self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
129     self.cfg_params.get('CMSSW.lumis_per_job',None)
130    
131 spiga 1.41 lumiList = None
132     if self.lumiMask:
133     lumiList = LumiList(filename=self.lumiMask)
134 ewv 1.44 if runselection:
135     runList = LumiList(runs = runselection)
136 spiga 1.26
137     self.splitByRun = int(self.cfg_params.get('CMSSW.split_by_run', 0))
138 ewv 1.45 common.logger.log(10-1,"runselection is: %s"%runselection)
139    
140     if not self.splitByRun:
141     self.splitByLumi = self.lumiMask or self.lumiParams or self.ads
142 spiga 1.26
143 slacapra 1.18 ## service API
144     args = {}
145     args['url'] = dbs_url
146     args['level'] = 'CRITICAL'
147    
148 spiga 1.21 ## check if has been requested to use the parent info
149 spiga 1.26 useparent = int(self.cfg_params.get('CMSSW.use_parent',0))
150 spiga 1.21
151 ewv 1.32 ## check if has been asked for a non default file to store/read analyzed fileBlocks
152     defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'
153 spiga 1.22 fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
154 ewv 1.32
155 slacapra 1.18 api = DBSAPI.dbsApi.DbsApi(args)
156 spiga 1.27 self.files = self.queryDbs(api,path=self.datasetPath,runselection=runselection,useParent=useparent)
157 slacapra 1.11
158 ewv 1.45 # Check to see what the dataset is
159     pdsName = self.datasetPath.split("/")[1]
160     primDSs = api.listPrimaryDatasets(pdsName)
161     dataType = primDSs[0]['Type']
162     common.logger.debug("Datatype is %s" % dataType)
163     if dataType == 'data' and not (self.splitByRun or self.splitByLumi):
164     msg = 'Data must be split by lumi or by run. ' \
165     'Please see crab -help for the correct settings'
166     raise CrabException(msg)
167    
168    
169    
170 spiga 1.22 anFileBlocks = []
171 ewv 1.32 if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
172 spiga 1.22
173 slacapra 1.18 # parse files and fill arrays
174 spiga 1.26 for file in self.files :
175 ewv 1.32 parList = []
176 spiga 1.41 fileLumis = [] # List of tuples
177 spiga 1.22 # skip already analyzed blocks
178     fileblock = file['Block']['Name']
179     if fileblock not in anFileBlocks :
180     filename = file['LogicalFileName']
181 ewv 1.32 # asked retry the list of parent for the given child
182     if useparent==1:
183     parList = [x['LogicalFileName'] for x in file['ParentList']]
184 ewv 1.45 if self.splitByLumi:
185 spiga 1.41 fileLumis = [ (x['RunNumber'], x['LumiSectionNumber'])
186 ewv 1.32 for x in file['LumiList'] ]
187     self.parent[filename] = parList
188 spiga 1.41 # For LumiMask, intersection of two lists.
189 ewv 1.45 if self.lumiMask and runselection:
190     self.lumis[filename] = runList.filterLumis(lumiList.filterLumis(fileLumis))
191     elif runselection:
192     self.lumis[filename] = runList.filterLumis(fileLumis)
193     elif self.lumiMask:
194 spiga 1.41 self.lumis[filename] = lumiList.filterLumis(fileLumis)
195     else:
196     self.lumis[filename] = fileLumis
197 spiga 1.22 if filename.find('.dat') < 0 :
198     events = file['NumberOfEvents']
199 ewv 1.32 # Count number of events and lumis per block
200 spiga 1.22 if fileblock in self.eventsPerBlock.keys() :
201     self.eventsPerBlock[fileblock] += events
202     else :
203     self.eventsPerBlock[fileblock] = events
204 ewv 1.32 # Number of events per file
205 spiga 1.22 self.eventsPerFile[filename] = events
206 ewv 1.32
207     # List of files per block
208 spiga 1.22 if fileblock in self.blocksinfo.keys() :
209     self.blocksinfo[fileblock].append(filename)
210     else :
211     self.blocksinfo[fileblock] = [filename]
212 ewv 1.32
213 spiga 1.22 # total number of events
214     self.maxEvents += events
215 spiga 1.41 self.maxLumis += len(self.lumis[filename])
216 ewv 1.32
217 spiga 1.22 if self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
218     msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
219 ewv 1.32 raise CrabException(msg)
220 slacapra 1.11
221 ewv 1.32
222 slacapra 1.11 if len(self.eventsPerBlock) <= 0:
223 slacapra 1.18 raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
224 slacapra 1.11 + " dataset path variables in crab.cfg")
225 slacapra 1.18 % self.datasetPath)
226 afanfani 1.1
227    
228 ewv 1.32 def queryDbs(self,api,path=None,runselection=None,useParent=None):
229 spiga 1.26
230 spiga 1.30 allowedRetriveValue = ['retrive_block', 'retrive_run']
231 ewv 1.42 if self.ads or self.lumiMask or self.lumiParams:
232 spiga 1.41 allowedRetriveValue.append('retrive_lumi')
233 ewv 1.32 if useParent == 1: allowedRetriveValue.append('retrive_parent')
234     common.logger.debug("Set of input parameters used for DBS query: %s" % allowedRetriveValue)
235 spiga 1.26 try:
236 ewv 1.45 if len(runselection) <=0 or self.splitByLumi:
237     if useParent==1 or self.splitByRun==1 or self.splitByLumi:
238 ewv 1.32 if self.ads:
239     files = api.listFiles(analysisDataset=path, retriveList=allowedRetriveValue)
240 spiga 1.26 else :
241 ewv 1.32 files = api.listFiles(path=path, retriveList=allowedRetriveValue)
242 spiga 1.26 else:
243     files = api.listDatasetFiles(self.datasetPath)
244     else :
245     files=[]
246     for arun in runselection:
247     try:
248 ewv 1.32 if self.ads:
249     filesinrun = api.listFiles(analysisDataset=path,retriveList=allowedRetriveValue,runNumber=arun)
250     else:
251     filesinrun = api.listFiles(path=path,retriveList=allowedRetriveValue,runNumber=arun)
252 spiga 1.26 files.extend(filesinrun)
253     except:
254     msg="WARNING: problem extracting info from DBS for run %s "%arun
255 spiga 1.31 common.logger.info(msg)
256 spiga 1.26 pass
257    
258     except DbsBadRequest, msg:
259     raise DataDiscoveryError(msg)
260     except DBSError, msg:
261     raise DataDiscoveryError(msg)
262    
263     return files
264    
265 ewv 1.32
266 afanfani 1.1 def getMaxEvents(self):
267     """
268 ewv 1.32 max events
269 afanfani 1.1 """
270 slacapra 1.18 return self.maxEvents
271 afanfani 1.1
272 ewv 1.32
273 ewv 1.33 def getMaxLumis(self):
274     """
275     Return the number of lumis in the dataset
276     """
277     return self.maxLumis
278    
279    
280 slacapra 1.11 def getEventsPerBlock(self):
281 afanfani 1.1 """
282 ewv 1.32 list the event collections structure by fileblock
283 afanfani 1.1 """
284 slacapra 1.11 return self.eventsPerBlock
285 afanfani 1.1
286 ewv 1.32
287 slacapra 1.11 def getEventsPerFile(self):
288 afanfani 1.1 """
289 ewv 1.32 list the event collections structure by file
290 afanfani 1.1 """
291 slacapra 1.11 return self.eventsPerFile
292 afanfani 1.1
293 ewv 1.32
294 slacapra 1.11 def getFiles(self):
295 afanfani 1.1 """
296 ewv 1.32 return files grouped by fileblock
297 afanfani 1.1 """
298 ewv 1.32 return self.blocksinfo
299    
300 afanfani 1.1
301 spiga 1.21 def getParent(self):
302     """
303 ewv 1.32 return parent grouped by file
304     """
305     return self.parent
306    
307    
308     def getLumis(self):
309     """
310     return lumi sections grouped by file
311 spiga 1.21 """
312 ewv 1.32 return self.lumis
313    
314 spiga 1.21
315 spiga 1.26 def getListFiles(self):
316     """
317 ewv 1.32 return parent grouped by file
318 spiga 1.26 """
319 ewv 1.32 return self.files