3 |
|
import time, glob |
4 |
|
from Actor import * |
5 |
|
from crab_util import * |
6 |
– |
from crab_logger import Logger |
6 |
|
from crab_exceptions import * |
7 |
|
from ProdCommon.FwkJobRep.ReportParser import readJobReport |
8 |
+ |
from ProdCommon.FwkJobRep.ReportState import checkSuccess |
9 |
|
from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec |
10 |
|
from ProdCommon.DataMgmt.DBS.DBSWriter import DBSWriter |
11 |
|
from ProdCommon.DataMgmt.DBS.DBSErrors import DBSWriterError, formatEx,DBSReaderError |
12 |
|
from ProdCommon.DataMgmt.DBS.DBSReader import DBSReader |
13 |
|
from ProdCommon.DataMgmt.DBS.DBSWriter import DBSWriter,DBSWriterObjects |
14 |
|
import sys |
15 |
+ |
from DBSAPI.dbsApiException import DbsException |
16 |
+ |
from DBSAPI.dbsApi import DbsApi |
17 |
|
|
18 |
|
class Publisher(Actor): |
19 |
|
def __init__(self, cfg_params): |
25 |
|
- publishes output data on DBS and DLS |
26 |
|
""" |
27 |
|
|
28 |
< |
try: |
29 |
< |
userprocessedData = cfg_params['USER.publish_data_name'] |
30 |
< |
self.processedData = None |
29 |
< |
except KeyError: |
28 |
> |
self.cfg_params=cfg_params |
29 |
> |
|
30 |
> |
if not cfg_params.has_key('USER.publish_data_name'): |
31 |
|
raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file') |
32 |
+ |
self.userprocessedData = cfg_params['USER.publish_data_name'] |
33 |
+ |
self.processedData = None |
34 |
|
|
35 |
< |
try: |
36 |
< |
if (int(cfg_params['USER.copy_data']) != 1): raise KeyError |
37 |
< |
except KeyError: |
38 |
< |
raise CrabException('You can not publish data because you did not selected *** copy_data = 1 *** in the crab.cfg file') |
39 |
< |
try: |
40 |
< |
self.pset = cfg_params['CMSSW.pset'] |
41 |
< |
except KeyError: |
35 |
> |
if (not cfg_params.has_key('USER.copy_data') or int(cfg_params['USER.copy_data']) != 1 ) or \ |
36 |
> |
(not cfg_params.has_key('USER.publish_data') or int(cfg_params['USER.publish_data']) != 1 ): |
37 |
> |
msg = 'You can not publish data because you did not selected \n' |
38 |
> |
msg += '\t*** copy_data = 1 and publish_data = 1 *** in the crab.cfg file' |
39 |
> |
raise CrabException(msg) |
40 |
> |
|
41 |
> |
if not cfg_params.has_key('CMSSW.pset'): |
42 |
|
raise CrabException('Cannot publish output data, because you did not specify the psetname in [CMSSW] of your crab.cfg file') |
43 |
< |
try: |
44 |
< |
self.globalDBS=cfg_params['CMSSW.dbs_url'] |
45 |
< |
except KeyError: |
46 |
< |
self.globalDBS="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet" |
47 |
< |
try: |
45 |
< |
self.DBSURL=cfg_params['USER.dbs_url_for_publication'] |
46 |
< |
common.logger.message('<dbs_url_for_publication> = '+self.DBSURL) |
47 |
< |
if (self.DBSURL == "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") or (self.DBSURL == "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global_writer/servlet/DBSServlet"): |
48 |
< |
msg = "You can not publish your data in the globalDBS = " + self.DBSURL + "\n" |
49 |
< |
msg = msg + "Please write your local one in the [USER] section 'dbs_url_for_publication'" |
50 |
< |
raise CrabException(msg) |
51 |
< |
except KeyError: |
43 |
> |
self.pset = cfg_params['CMSSW.pset'] |
44 |
> |
|
45 |
> |
self.globalDBS=cfg_params.get('CMSSW.dbs_url',"http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") |
46 |
> |
|
47 |
> |
if not cfg_params.has_key('USER.dbs_url_for_publication'): |
48 |
|
msg = "Warning. The [USER] section does not have 'dbs_url_for_publication'" |
49 |
|
msg = msg + " entry, necessary to publish the data.\n" |
50 |
|
msg = msg + "Use the command **crab -publish -USER.dbs_url_for_publication=dbs_url_for_publication*** \nwhere dbs_url_for_publication is your local dbs instance." |
51 |
|
raise CrabException(msg) |
52 |
+ |
|
53 |
+ |
self.DBSURL=cfg_params['USER.dbs_url_for_publication'] |
54 |
+ |
common.logger.info('<dbs_url_for_publication> = '+self.DBSURL) |
55 |
+ |
if (self.DBSURL == "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") or (self.DBSURL == "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global_writer/servlet/DBSServlet"): |
56 |
+ |
msg = "You can not publish your data in the globalDBS = " + self.DBSURL + "\n" |
57 |
+ |
msg = msg + "Please write your local one in the [USER] section 'dbs_url_for_publication'" |
58 |
+ |
raise CrabException(msg) |
59 |
|
|
60 |
|
self.content=file(self.pset).read() |
61 |
|
self.resDir = common.work_space.resDir() |
74 |
|
dataset=string.strip(dataset) |
75 |
|
self.dataset_to_import.append(dataset) |
76 |
|
### |
77 |
< |
|
77 |
> |
|
78 |
> |
self.import_all_parents = cfg_params.get('USER.publish_with_import_all_parents',1) |
79 |
> |
self.skipOcheck=cfg_params.get('CMSSW.publish_zero_event',0) |
80 |
> |
|
81 |
|
self.SEName='' |
82 |
|
self.CMSSW_VERSION='' |
83 |
|
self.exit_status='' |
88 |
|
|
89 |
|
def importParentDataset(self,globalDBS, datasetpath): |
90 |
|
""" |
91 |
< |
""" |
91 |
> |
""" |
92 |
|
dbsWriter = DBSWriter(self.DBSURL,level='ERROR') |
93 |
|
|
94 |
|
try: |
95 |
< |
dbsWriter.importDatasetWithoutParentage(globalDBS, self.datasetpath, self.DBSURL) |
95 |
> |
if (self.import_all_parents==1): |
96 |
> |
common.logger.info("--->>> Importing all parents level") |
97 |
> |
start = time.time() |
98 |
> |
common.logger.debug("start import time: " + str(start)) |
99 |
> |
### to skip the ProdCommon api exception in the case of block without location |
100 |
> |
### skipNoSiteError=True |
101 |
> |
#dbsWriter.importDataset(globalDBS, datasetpath, self.DBSURL, skipNoSiteError=True) |
102 |
> |
### calling dbs api directly |
103 |
> |
dbsWriter.dbs.migrateDatasetContents(globalDBS, self.DBSURL, datasetpath) |
104 |
> |
stop = time.time() |
105 |
> |
common.logger.debug("stop import time: " + str(stop)) |
106 |
> |
common.logger.info("--->>> duration of all parents import (sec): "+str(stop - start)) |
107 |
> |
|
108 |
> |
else: |
109 |
> |
common.logger.info("--->>> Importing only the datasetpath " + datasetpath) |
110 |
> |
start = time.time() |
111 |
> |
#dbsWriter.importDatasetWithoutParentage(globalDBS, datasetpath, self.DBSURL, skipNoSiteError=True) |
112 |
> |
### calling dbs api directly |
113 |
> |
common.logger.debug("start import time: " + str(start)) |
114 |
> |
dbsWriter.dbs.migrateDatasetContents(globalDBS, self.DBSURL, datasetpath, noParentsReadOnly = True ) |
115 |
> |
stop = time.time() |
116 |
> |
common.logger.debug("stop import time: " + str(stop)) |
117 |
> |
common.logger.info("--->>> duration of first level parent import (sec): "+str(stop - start)) |
118 |
|
except DBSWriterError, ex: |
119 |
|
msg = "Error importing dataset to be processed into local DBS\n" |
120 |
|
msg += "Source Dataset: %s\n" % datasetpath |
121 |
|
msg += "Source DBS: %s\n" % globalDBS |
122 |
|
msg += "Destination DBS: %s\n" % self.DBSURL |
123 |
< |
common.logger.message(msg) |
123 |
> |
common.logger.info(msg) |
124 |
> |
common.logger.info(str(ex)) |
125 |
|
return 1 |
126 |
|
return 0 |
127 |
< |
|
127 |
> |
|
128 |
|
def publishDataset(self,file): |
129 |
|
""" |
130 |
|
""" |
134 |
|
except IndexError: |
135 |
|
self.exit_status = '1' |
136 |
|
msg = "Error: Problem with "+file+" file" |
137 |
< |
common.logger.message(msg) |
137 |
> |
common.logger.info(msg) |
138 |
|
return self.exit_status |
139 |
|
|
140 |
|
if (len(self.dataset_to_import) != 0): |
141 |
|
for dataset in self.dataset_to_import: |
142 |
< |
common.logger.message("--->>> Importing parent dataset in the dbs: " +dataset) |
142 |
> |
common.logger.info("--->>> Importing parent dataset in the dbs: " +dataset) |
143 |
|
status_import=self.importParentDataset(self.globalDBS, dataset) |
144 |
|
if (status_import == 1): |
145 |
< |
common.logger.message('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL) |
145 |
> |
common.logger.info('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL) |
146 |
|
self.exit_status='1' |
147 |
|
return self.exit_status |
148 |
|
else: |
149 |
< |
common.logger.message('Import ok of dataset '+dataset) |
149 |
> |
common.logger.info('Import ok of dataset '+dataset) |
150 |
|
|
151 |
|
#// DBS to contact |
152 |
|
dbswriter = DBSWriter(self.DBSURL) |
155 |
|
self.exit_status = '0' |
156 |
|
except IndexError: |
157 |
|
self.exit_status = '1' |
158 |
< |
msg = "Error: No file to publish in xml file"+file+" file" |
159 |
< |
common.logger.message(msg) |
158 |
> |
msg = "Error: No EDM file to publish in xml file"+file+" file" |
159 |
> |
common.logger.info(msg) |
160 |
|
return self.exit_status |
161 |
|
|
162 |
|
datasets=fileinfo.dataset |
163 |
< |
common.logger.debug(6,"FileInfo = " + str(fileinfo)) |
164 |
< |
common.logger.debug(6,"DatasetInfo = " + str(datasets)) |
163 |
> |
common.logger.log(10-1,"FileInfo = " + str(fileinfo)) |
164 |
> |
common.logger.log(10-1,"DatasetInfo = " + str(datasets)) |
165 |
> |
if len(datasets)<=0: |
166 |
> |
self.exit_status = '1' |
167 |
> |
msg = "Error: No info about dataset in the xml file "+file |
168 |
> |
common.logger.info(msg) |
169 |
> |
return self.exit_status |
170 |
|
for dataset in datasets: |
171 |
|
#### for production data |
172 |
|
self.processedData = dataset['ProcessedDataset'] |
173 |
|
if (dataset['PrimaryDataset'] == 'null'): |
174 |
< |
dataset['PrimaryDataset'] = dataset['ProcessedDataset'] |
175 |
< |
else: # add parentage from input dataset |
174 |
> |
#dataset['PrimaryDataset'] = dataset['ProcessedDataset'] |
175 |
> |
dataset['PrimaryDataset'] = self.userprocessedData |
176 |
> |
#else: # add parentage from input dataset |
177 |
> |
elif self.datasetpath.upper() != 'NONE': |
178 |
|
dataset['ParentDataset']= self.datasetpath |
179 |
|
|
180 |
|
dataset['PSetContent']=self.content |
181 |
|
cfgMeta = {'name' : self.pset , 'Type' : 'user' , 'annotation': 'user cfg', 'version' : 'private version'} # add real name of user cfg |
182 |
< |
common.logger.message("PrimaryDataset = %s"%dataset['PrimaryDataset']) |
183 |
< |
common.logger.message("ProcessedDataset = %s"%dataset['ProcessedDataset']) |
184 |
< |
common.logger.message("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER") |
182 |
> |
common.logger.info("PrimaryDataset = %s"%dataset['PrimaryDataset']) |
183 |
> |
common.logger.info("ProcessedDataset = %s"%dataset['ProcessedDataset']) |
184 |
> |
common.logger.info("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER") |
185 |
> |
self.dataset_to_check="/"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER" |
186 |
|
|
187 |
< |
common.logger.debug(6,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset'])) |
187 |
> |
common.logger.log(10-1,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset'])) |
188 |
|
|
189 |
|
primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs) |
190 |
< |
common.logger.debug(6,"Primary: %s "%primary) |
190 |
> |
common.logger.log(10-1,"Primary: %s "%primary) |
191 |
|
|
192 |
|
algo = DBSWriterObjects.createAlgorithm(dataset, cfgMeta, dbswriter.dbs) |
193 |
< |
common.logger.debug(6,"Algo: %s "%algo) |
193 |
> |
common.logger.log(10-1,"Algo: %s "%algo) |
194 |
|
|
195 |
|
processed = DBSWriterObjects.createProcessedDataset(primary, algo, dataset, dbswriter.dbs) |
196 |
< |
common.logger.debug(6,"Processed: %s "%processed) |
196 |
> |
common.logger.log(10-1,"Processed: %s "%processed) |
197 |
|
|
198 |
< |
common.logger.debug(6,"Inserted primary %s processed %s"%(primary,processed)) |
198 |
> |
common.logger.log(10-1,"Inserted primary %s processed %s"%(primary,processed)) |
199 |
|
|
200 |
< |
common.logger.debug(6,"exit_status = %s "%self.exit_status) |
200 |
> |
common.logger.log(10-1,"exit_status = %s "%self.exit_status) |
201 |
|
return self.exit_status |
202 |
|
|
203 |
|
def publishAJobReport(self,file,procdataset): |
204 |
|
""" |
205 |
|
input: xml file, processedDataset |
206 |
|
""" |
207 |
+ |
common.logger.debug("FJR = %s"%file) |
208 |
|
try: |
209 |
|
jobReport = readJobReport(file)[0] |
210 |
|
self.exit_status = '0' |
223 |
|
elif (file['LFN'] == ''): |
224 |
|
self.noLFN.append(file['PFN']) |
225 |
|
else: |
226 |
< |
if int(file['TotalEvents']) != 0 : |
227 |
< |
#file.lumisections = {} |
228 |
< |
# lumi info are now in run hash |
226 |
> |
if self.skipOcheck==0: |
227 |
> |
if int(file['TotalEvents']) != 0: |
228 |
> |
#file.lumisections = {} |
229 |
> |
# lumi info are now in run hash |
230 |
> |
file.runs = {} |
231 |
> |
for ds in file.dataset: |
232 |
> |
### Fede for production |
233 |
> |
if (ds['PrimaryDataset'] == 'null'): |
234 |
> |
#ds['PrimaryDataset']=procdataset |
235 |
> |
ds['PrimaryDataset']=self.userprocessedData |
236 |
> |
filestopublish.append(file) |
237 |
> |
else: |
238 |
> |
self.noEventsFiles.append(file['LFN']) |
239 |
> |
else: |
240 |
|
file.runs = {} |
241 |
|
for ds in file.dataset: |
193 |
– |
### FEDE FOR NEW LFN ### |
194 |
– |
#ds['ProcessedDataset']=procdataset |
195 |
– |
######################## |
242 |
|
### Fede for production |
243 |
|
if (ds['PrimaryDataset'] == 'null'): |
244 |
< |
ds['PrimaryDataset']=procdataset |
244 |
> |
#ds['PrimaryDataset']=procdataset |
245 |
> |
ds['PrimaryDataset']=self.userprocessedData |
246 |
|
filestopublish.append(file) |
247 |
< |
else: |
201 |
< |
self.noEventsFiles.append(file['LFN']) |
247 |
> |
|
248 |
|
jobReport.files = filestopublish |
249 |
+ |
for file in filestopublish: |
250 |
+ |
common.logger.debug("--->>> LFN of file to publish = " + str(file['LFN'])) |
251 |
|
### if all files of FJR have number of events = 0 |
252 |
|
if (len(filestopublish) == 0): |
253 |
< |
return None |
253 |
> |
return None |
254 |
|
|
255 |
|
#// DBS to contact |
256 |
|
dbswriter = DBSWriter(self.DBSURL) |
258 |
|
Blocks=None |
259 |
|
try: |
260 |
|
Blocks=dbswriter.insertFiles(jobReport) |
261 |
< |
common.logger.message("Blocks = %s"%Blocks) |
261 |
> |
common.logger.debug("--->>> Inserting file in blocks = %s"%Blocks) |
262 |
|
except DBSWriterError, ex: |
263 |
< |
common.logger.message("Insert file error: %s"%ex) |
263 |
> |
common.logger.debug("--->>> Insert file error: %s"%ex) |
264 |
|
return Blocks |
265 |
|
|
266 |
|
def run(self): |
269 |
|
""" |
270 |
|
|
271 |
|
file_list = glob.glob(self.resDir+"crab_fjr*.xml") |
272 |
< |
common.logger.debug(6, "file_list = "+str(file_list)) |
273 |
< |
common.logger.debug(6, "len(file_list) = "+str(len(file_list))) |
272 |
> |
|
273 |
> |
## Select only those fjr that are succesfull |
274 |
> |
if (len(file_list)==0): |
275 |
> |
common.logger.info("--->>> "+self.resDir+" empty: no file to publish on DBS") |
276 |
> |
self.exit_status = '1' |
277 |
> |
return self.exit_status |
278 |
> |
|
279 |
> |
good_list=[] |
280 |
> |
for fjr in file_list: |
281 |
> |
reports = readJobReport(fjr) |
282 |
> |
if len(reports)>0: |
283 |
> |
if reports[0].status == "Success": |
284 |
> |
good_list.append(fjr) |
285 |
> |
file_list=good_list |
286 |
> |
## |
287 |
> |
common.logger.log(10-1, "file_list = "+str(file_list)) |
288 |
> |
common.logger.log(10-1, "len(file_list) = "+str(len(file_list))) |
289 |
|
|
290 |
|
if (len(file_list)>0): |
291 |
|
BlocksList=[] |
292 |
< |
common.logger.message("--->>> Start dataset publication") |
292 |
> |
common.logger.info("--->>> Start dataset publication") |
293 |
|
self.exit_status=self.publishDataset(file_list[0]) |
294 |
|
if (self.exit_status == '1'): |
295 |
|
return self.exit_status |
296 |
< |
common.logger.message("--->>> End dataset publication") |
296 |
> |
common.logger.info("--->>> End dataset publication") |
297 |
|
|
298 |
|
|
299 |
< |
common.logger.message("--->>> Start files publication") |
299 |
> |
common.logger.info("--->>> Start files publication") |
300 |
|
for file in file_list: |
238 |
– |
common.logger.message("file = "+file) |
301 |
|
Blocks=self.publishAJobReport(file,self.processedData) |
302 |
|
if Blocks: |
303 |
|
for x in Blocks: # do not allow multiple entries of the same block |
305 |
|
BlocksList.append(x) |
306 |
|
|
307 |
|
# close the blocks |
308 |
< |
common.logger.debug(6, "BlocksList = %s"%BlocksList) |
308 |
> |
common.logger.log(10-1, "BlocksList = %s"%BlocksList) |
309 |
|
# dbswriter = DBSWriter(self.DBSURL,level='ERROR') |
310 |
|
dbswriter = DBSWriter(self.DBSURL) |
311 |
|
|
312 |
|
for BlockName in BlocksList: |
313 |
|
try: |
314 |
|
closeBlock=dbswriter.manageFileBlock(BlockName,maxFiles= 1) |
315 |
< |
common.logger.debug(6, "closeBlock %s"%closeBlock) |
315 |
> |
common.logger.log(10-1, "closeBlock %s"%closeBlock) |
316 |
|
#dbswriter.dbs.closeBlock(BlockName) |
317 |
|
except DBSWriterError, ex: |
318 |
< |
common.logger.message("Close block error %s"%ex) |
318 |
> |
common.logger.info("Close block error %s"%ex) |
319 |
|
|
320 |
|
if (len(self.noEventsFiles)>0): |
321 |
< |
common.logger.message("--->>> WARNING: "+str(len(self.noEventsFiles))+" files not published because they contain 0 events are:") |
321 |
> |
common.logger.info("--->>> WARNING: "+str(len(self.noEventsFiles))+" files not published because they contain 0 events are:") |
322 |
|
for lfn in self.noEventsFiles: |
323 |
< |
common.logger.message("------ LFN: %s"%lfn) |
323 |
> |
common.logger.info("------ LFN: %s"%lfn) |
324 |
|
if (len(self.noLFN)>0): |
325 |
< |
common.logger.message("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN") |
325 |
> |
common.logger.info("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN") |
326 |
|
for pfn in self.noLFN: |
327 |
< |
common.logger.message("------ pfn: %s"%pfn) |
327 |
> |
common.logger.info("------ pfn: %s"%pfn) |
328 |
|
if (len(self.problemFiles)>0): |
329 |
< |
common.logger.message("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE") |
329 |
> |
common.logger.info("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE") |
330 |
|
for lfn in self.problemFiles: |
331 |
< |
common.logger.message("------ LFN: %s"%lfn) |
332 |
< |
common.logger.message("--->>> End files publication") |
333 |
< |
common.logger.message("--->>> To check data publication please use: InspectDBS2.py --DBSURL=<dbs_url_for_publication> --datasetPath=<User Dataset Name>") |
331 |
> |
common.logger.info("------ LFN: %s"%lfn) |
332 |
> |
common.logger.info("--->>> End files publication") |
333 |
> |
|
334 |
> |
self.cfg_params['USER.dataset_to_check']=self.dataset_to_check |
335 |
> |
from InspectDBS import InspectDBS |
336 |
> |
check=InspectDBS(self.cfg_params) |
337 |
> |
check.checkPublication() |
338 |
|
return self.exit_status |
339 |
|
|
340 |
|
else: |
341 |
< |
common.logger.message("--->>> "+self.resDir+" empty: no file to publish on DBS") |
341 |
> |
common.logger.info("--->>> No valid files to publish on DBS. Your jobs do not report exit codes = 0") |
342 |
|
self.exit_status = '1' |
343 |
|
return self.exit_status |
344 |
|
|