ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery_DBS2.py
Revision: 1.9
Committed: Wed Jul 4 00:50:09 2007 UTC (17 years, 10 months ago) by gutsche
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_0_2_pre5, CRAB_2_0_2_pre4, CRAB_2_0_2_pre3, CRAB_1_5_4_SLC3, CRAB_1_5_4_SLC3_pre4, CRAB_2_0_2_pre2, CRAB_2_0_2_pre1, CRAB_1_5_4_SLC3_pre3, CRAB_2_0_1, CRAB_1_5_4_SLC3_pre2, CRAB_2_0_1_pre1, CRAB_1_5_4_SLC3_pre1, CRAB_2_0_0, CRAB_2_0_0_pre10, CRAB_2_0_0_pre9, CRAB_1_5_4, CRAB_1_5_4_pre2, CRAB_1_5_4_pre1, CRAB_2_0_0_pre7, CRAB_2_0_0_pre6, CRAB_1_5_3, CRAB_1_5_3_pre5, CRAB_1_5_3_pre4, CRAB_2_0_0_pre5, CRAB_1_5_3_pre3, configure, CRAB_2_0_0_pre4
Branch point for: CRAB_1_5_4_SLC3_pre4_br, CRAB_1_5_4_SLC3_start
Changes since 1.8: +41 -20 lines
Log Message:
Introduced run selection within single PDS

crab.cfg card:

runselection = 1,2,3-4

Parsing is done on UI, DBS has no suited API to do parsing on DBS server (yet)
api.listFiles is used with details=True which increases query time significantly.

In addition, filtered for .dat files which can be present in PDS mixed with .root

File Contents

# User Rev Content
1 gutsche 1.1 #!/usr/bin/env python
2     import exceptions
3 gutsche 1.2 import DBSAPI.dbsApi
4     from DBSAPI.dbsApiException import *
5 gutsche 1.1 import common
6 gutsche 1.9 from crab_util import *
7 gutsche 1.1
8    
9     # #######################################
10     class DBSError_DBS2(exceptions.Exception):
11     def __init__(self, errorName, errorMessage):
12     args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
13     exceptions.Exception.__init__(self, args)
14     pass
15    
16     def getErrorMessage(self):
17     """ Return error message """
18     return "%s" % (self.args)
19    
20     # #######################################
21     class DBSInvalidDataTierError_DBS2(exceptions.Exception):
22     def __init__(self, errorName, errorMessage):
23     args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
24     exceptions.Exception.__init__(self, args)
25     pass
26    
27     def getErrorMessage(self):
28     """ Return error message """
29     return "%s" % (self.args)
30    
31     # #######################################
32     class DBSInfoError_DBS2:
33     def __init__(self, url):
34     print '\nERROR accessing DBS url : '+url+'\n'
35     pass
36    
37     # ####################################
38     class DataDiscoveryError_DBS2(exceptions.Exception):
39     def __init__(self, errorMessage):
40     self.args=errorMessage
41     exceptions.Exception.__init__(self, self.args)
42     pass
43    
44     def getErrorMessage(self):
45     """ Return exception error """
46     return "%s" % (self.args)
47    
48     # ####################################
49     class NotExistingDatasetError_DBS2(exceptions.Exception):
50     def __init__(self, errorMessage):
51     self.args=errorMessage
52     exceptions.Exception.__init__(self, self.args)
53     pass
54    
55     def getErrorMessage(self):
56     """ Return exception error """
57     return "%s" % (self.args)
58    
59     # ####################################
60     class NoDataTierinProvenanceError_DBS2(exceptions.Exception):
61     def __init__(self, errorMessage):
62     self.args=errorMessage
63     exceptions.Exception.__init__(self, self.args)
64     pass
65    
66     def getErrorMessage(self):
67     """ Return exception error """
68     return "%s" % (self.args)
69    
70     # ####################################
71     # class to find and extact info from published data
72     class DataDiscovery_DBS2:
73     def __init__(self, datasetPath, cfg_params):
74    
75     # Attributes
76     self.datasetPath = datasetPath
77     self.cfg_params = cfg_params
78    
79     self.eventsPerBlock = {} # DBS output: map fileblocks-events for collection
80     self.eventsPerFile = {} # DBS output: map files-events
81     self.blocksinfo = {} # DBS output: map fileblocks-files
82     self.maxEvents = 0 # DBS output: max events
83    
84     # ####################################
85     def fetchDBSInfo(self):
86     """
87     Contact DBS
88     """
89    
90     ## get DBS URL
91     try:
92     dbs_url=self.cfg_params['CMSSW.dbs_url']
93     except KeyError:
94 gutsche 1.6 dbs_url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
95 gutsche 1.1
96 gutsche 1.8 common.logger.debug(3,"Accessing DBS at: "+dbs_url)
97 gutsche 1.7
98 gutsche 1.9 ## check if runs are selected
99     try:
100     runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
101     except:
102     runselection = []
103 gutsche 1.7
104 gutsche 1.1 ## service API
105     args = {}
106 gutsche 1.3 args['url'] = dbs_url
107 gutsche 1.4 args['level'] = 'CRITICAL'
108 gutsche 1.1
109 gutsche 1.2 api = DBSAPI.dbsApi.DbsApi(args)
110 gutsche 1.1 try:
111 gutsche 1.9 if len(runselection) <= 0 :
112     files = api.listDatasetFiles(self.datasetPath)
113     else :
114     files = api.listFiles(path=self.datasetPath, details=True)
115 gutsche 1.1 except DbsBadRequest, msg:
116 gutsche 1.2 raise DataDiscoveryError_DBS2(msg)
117 gutsche 1.1 except DBSError_DBS2, msg:
118 gutsche 1.2 raise DataDiscoveryError_DBS2(msg)
119 gutsche 1.8
120 gutsche 1.1 # parse files and fill arrays
121     for file in files :
122     filename = file['LogicalFileName']
123 gutsche 1.9 if filename.find('.dat') < 0 :
124     fileblock = file['Block']['Name']
125     events = file['NumberOfEvents']
126     continue_flag = 0
127     if len(runselection) > 0 :
128     runslist = file['RunsList']
129     for run in runslist :
130     runnumber = run['RunNumber']
131     for selected_run in runselection :
132     if runnumber == selected_run :
133     continue_flag = 1
134     else :
135     continue_flag = 1
136    
137     if continue_flag == 1 :
138     # number of events per block
139     if fileblock in self.eventsPerBlock.keys() :
140     self.eventsPerBlock[fileblock] += events
141     else :
142     self.eventsPerBlock[fileblock] = events
143    
144     # number of events per file
145     self.eventsPerFile[filename] = events
146    
147     # number of events per block
148     if fileblock in self.blocksinfo.keys() :
149     self.blocksinfo[fileblock].append(filename)
150     else :
151     self.blocksinfo[fileblock] = [filename]
152 gutsche 1.1
153 gutsche 1.9 # total number of events
154     self.maxEvents += events
155 gutsche 1.1
156 gutsche 1.7 for block in self.eventsPerBlock.keys() :
157     common.logger.debug(6,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
158    
159 gutsche 1.1 if len(self.eventsPerBlock) <= 0:
160 gutsche 1.2 raise NotExistingDatasetError_DBS2 (("\nNo data for %s in DBS\nPlease check"
161 gutsche 1.1 + " dataset path variables in crab.cfg")
162     % self.datasetPath)
163    
164    
165     # #################################################
166     def getMaxEvents(self):
167     """
168     max events
169     """
170     return self.maxEvents
171    
172     # #################################################
173     def getEventsPerBlock(self):
174     """
175     list the event collections structure by fileblock
176     """
177     return self.eventsPerBlock
178    
179     # #################################################
180     def getEventsPerFile(self):
181     """
182     list the event collections structure by file
183     """
184     return self.eventsPerFile
185    
186     # #################################################
187     def getFiles(self):
188     """
189     return files grouped by fileblock
190     """
191     return self.blocksinfo
192    
193     ########################################################################