1 |
|
from JobType import JobType |
2 |
– |
from crab_logger import Logger |
2 |
|
from crab_exceptions import * |
3 |
|
from crab_util import * |
4 |
|
import common |
6 |
– |
import PsetManipulator |
7 |
– |
|
8 |
– |
import DBSInfo_EDM |
9 |
– |
#from DataDiscovery_EDM import DataDiscovery_EDM |
10 |
– |
import DataDiscovery_EDM |
11 |
– |
#from DataLocation_EDM import DataLocation_EDM |
12 |
– |
import DataLocation_EDM |
5 |
|
import Scram |
6 |
+ |
from Splitter import JobSplitter |
7 |
|
|
8 |
< |
import os, string, re |
8 |
> |
from IMProv.IMProvNode import IMProvNode |
9 |
> |
import os, string, glob |
10 |
|
|
11 |
|
class Cmssw(JobType): |
12 |
< |
def __init__(self, cfg_params): |
12 |
> |
def __init__(self, cfg_params, ncjobs,skip_blocks, isNew): |
13 |
|
JobType.__init__(self, 'CMSSW') |
14 |
< |
common.logger.debug(3,'CMSSW::__init__') |
14 |
> |
common.logger.debug('CMSSW::__init__') |
15 |
> |
self.skip_blocks = skip_blocks |
16 |
> |
self.argsList = 1 |
17 |
|
|
22 |
– |
self.analisys_common_info = {} |
23 |
– |
# Marco. |
18 |
|
self._params = {} |
19 |
|
self.cfg_params = cfg_params |
20 |
|
|
21 |
< |
log = common.logger |
22 |
< |
|
21 |
> |
### Temporary patch to automatically skip the ISB size check: |
22 |
> |
server=self.cfg_params.get('CRAB.server_name',None) |
23 |
> |
size = 9.5 |
24 |
> |
if server or common.scheduler.name().upper() in ['LSF','CAF']: size = 99999 |
25 |
> |
### D.S. |
26 |
> |
self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size)) |
27 |
> |
|
28 |
> |
# number of jobs requested to be created, limit obj splitting |
29 |
> |
self.ncjobs = ncjobs |
30 |
> |
|
31 |
|
self.scram = Scram.Scram(cfg_params) |
30 |
– |
scramArea = '' |
32 |
|
self.additional_inbox_files = [] |
33 |
|
self.scriptExe = '' |
34 |
|
self.executable = '' |
35 |
< |
self.tgz_name = 'default.tgz' |
36 |
< |
|
35 |
> |
self.executable_arch = self.scram.getArch() |
36 |
> |
self.tgz_name = 'default.tar.gz' |
37 |
> |
self.tar_name = 'default.tar' |
38 |
> |
self.scriptName = 'CMSSW.sh' |
39 |
> |
self.pset = '' |
40 |
> |
self.datasetPath = '' |
41 |
> |
|
42 |
> |
self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name |
43 |
> |
# set FJR file name |
44 |
> |
self.fjrFileName = 'crab_fjr.xml' |
45 |
|
|
46 |
|
self.version = self.scram.getSWVersion() |
47 |
< |
common.analisys_common_info['sw_version'] = self.version |
39 |
< |
### FEDE |
40 |
< |
common.analisys_common_info['copy_input_data'] = 0 |
41 |
< |
common.analisys_common_info['events_management'] = 1 |
47 |
> |
common.logger.log(10-1,"CMSSW version is: "+str(self.version)) |
48 |
|
|
49 |
< |
### collect Data cards |
49 |
> |
version_array = self.version.split('_') |
50 |
> |
self.CMSSW_major = 0 |
51 |
> |
self.CMSSW_minor = 0 |
52 |
> |
self.CMSSW_patch = 0 |
53 |
|
try: |
54 |
< |
# self.owner = cfg_params['CMSSW.owner'] |
55 |
< |
# log.debug(6, "CMSSW::CMSSW(): owner = "+self.owner) |
56 |
< |
# self.dataset = cfg_params['CMSSW.dataset'] |
57 |
< |
self.datasetPath = cfg_params['CMSSW.datasetpath'] |
58 |
< |
log.debug(6, "CMSSW::CMSSW(): datasetPath = "+self.datasetPath) |
59 |
< |
except KeyError: |
60 |
< |
# msg = "Error: owner and/or dataset not defined " |
61 |
< |
msg = "Error: datasetpath not defined " |
54 |
> |
self.CMSSW_major = int(version_array[1]) |
55 |
> |
self.CMSSW_minor = int(version_array[2]) |
56 |
> |
self.CMSSW_patch = int(version_array[3]) |
57 |
> |
except: |
58 |
> |
msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!" |
59 |
> |
raise CrabException(msg) |
60 |
> |
|
61 |
> |
if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5): |
62 |
> |
msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version." |
63 |
|
raise CrabException(msg) |
64 |
+ |
""" |
65 |
+ |
As CMSSW versions are dropped we can drop more code: |
66 |
+ |
1.X dropped: drop support for running .cfg on WN |
67 |
+ |
2.0 dropped: drop all support for cfg here and in writeCfg |
68 |
+ |
2.0 dropped: Recheck the random number seed support |
69 |
+ |
""" |
70 |
+ |
|
71 |
+ |
### collect Data cards |
72 |
+ |
|
73 |
+ |
|
74 |
+ |
### Temporary: added to remove input file control in the case of PU |
75 |
+ |
self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None) |
76 |
+ |
|
77 |
+ |
tmp = cfg_params['CMSSW.datasetpath'] |
78 |
+ |
common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp) |
79 |
+ |
|
80 |
+ |
if tmp =='': |
81 |
+ |
msg = "Error: datasetpath not defined " |
82 |
+ |
raise CrabException(msg) |
83 |
+ |
elif string.lower(tmp)=='none': |
84 |
+ |
self.datasetPath = None |
85 |
+ |
self.selectNoInput = 1 |
86 |
+ |
else: |
87 |
+ |
self.datasetPath = tmp |
88 |
+ |
self.selectNoInput = 0 |
89 |
+ |
|
90 |
|
self.dataTiers = [] |
91 |
< |
# try: |
92 |
< |
# tmpDataTiers = string.split(cfg_params['CMSSW.data_tier'],',') |
93 |
< |
# for tmp in tmpDataTiers: |
94 |
< |
# tmp=string.strip(tmp) |
59 |
< |
# self.dataTiers.append(tmp) |
60 |
< |
# pass |
61 |
< |
# pass |
62 |
< |
# except KeyError: |
63 |
< |
# pass |
64 |
< |
# log.debug(6, "Cmssw::Cmssw(): dataTiers = "+str(self.dataTiers)) |
91 |
> |
|
92 |
> |
self.debugWrap='' |
93 |
> |
self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0)) |
94 |
> |
if self.debug_wrapper == 1: self.debugWrap='--debug' |
95 |
|
|
96 |
|
## now the application |
97 |
< |
try: |
98 |
< |
self.executable = cfg_params['CMSSW.executable'] |
99 |
< |
log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable) |
100 |
< |
msg = "Default executable cmsRun overridden. Switch to " + self.executable |
71 |
< |
log.debug(3,msg) |
72 |
< |
except KeyError: |
73 |
< |
self.executable = 'cmsRun' |
74 |
< |
msg = "User executable not defined. Use cmsRun" |
75 |
< |
log.debug(3,msg) |
76 |
< |
pass |
97 |
> |
self.managedGenerators = ['madgraph', 'comphep', 'lhe'] |
98 |
> |
self.generator = cfg_params.get('CMSSW.generator','pythia').lower() |
99 |
> |
self.executable = cfg_params.get('CMSSW.executable','cmsRun') |
100 |
> |
common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable) |
101 |
|
|
102 |
< |
try: |
103 |
< |
self.pset = cfg_params['CMSSW.pset'] |
104 |
< |
log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset) |
102 |
> |
if not cfg_params.has_key('CMSSW.pset'): |
103 |
> |
raise CrabException("PSet file missing. Cannot run cmsRun ") |
104 |
> |
self.pset = cfg_params['CMSSW.pset'] |
105 |
> |
common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset) |
106 |
> |
if self.pset.lower() != 'none' : |
107 |
|
if (not os.path.exists(self.pset)): |
108 |
|
raise CrabException("User defined PSet file "+self.pset+" does not exist") |
109 |
< |
except KeyError: |
110 |
< |
raise CrabException("PSet file missing. Cannot run cmsRun ") |
109 |
> |
else: |
110 |
> |
self.pset = None |
111 |
|
|
112 |
|
# output files |
113 |
< |
try: |
114 |
< |
self.output_file = [] |
113 |
> |
## stuff which must be returned always via sandbox |
114 |
> |
self.output_file_sandbox = [] |
115 |
|
|
116 |
< |
tmp = cfg_params['CMSSW.output_file'] |
117 |
< |
if tmp != '': |
118 |
< |
tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',') |
119 |
< |
log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles)) |
120 |
< |
for tmp in tmpOutFiles: |
121 |
< |
tmp=string.strip(tmp) |
122 |
< |
self.output_file.append(tmp) |
123 |
< |
pass |
124 |
< |
else: |
125 |
< |
log.message("No output file defined: only stdout/err will be available") |
126 |
< |
pass |
127 |
< |
pass |
102 |
< |
except KeyError: |
103 |
< |
log.message("No output file defined: only stdout/err will be available") |
104 |
< |
pass |
116 |
> |
# add fjr report by default via sandbox |
117 |
> |
self.output_file_sandbox.append(self.fjrFileName) |
118 |
> |
|
119 |
> |
# other output files to be returned via sandbox or copied to SE |
120 |
> |
outfileflag = False |
121 |
> |
self.output_file = [] |
122 |
> |
tmp = cfg_params.get('CMSSW.output_file',None) |
123 |
> |
if tmp : |
124 |
> |
self.output_file = [x.strip() for x in tmp.split(',')] |
125 |
> |
outfileflag = True #output found |
126 |
> |
#else: |
127 |
> |
# log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n") |
128 |
|
|
129 |
|
# script_exe file as additional file in inputSandbox |
130 |
< |
try: |
131 |
< |
self.scriptExe = cfg_params['USER.script_exe'] |
132 |
< |
self.additional_inbox_files.append(self.scriptExe) |
133 |
< |
except KeyError: |
134 |
< |
pass |
135 |
< |
if self.scriptExe != '': |
113 |
< |
if os.path.isfile(self.scriptExe): |
114 |
< |
pass |
115 |
< |
else: |
116 |
< |
log.message("WARNING. file "+self.scriptExe+" not found") |
117 |
< |
sys.exit() |
118 |
< |
|
119 |
< |
## additional input files |
120 |
< |
try: |
121 |
< |
tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',') |
122 |
< |
for tmp in tmpAddFiles: |
123 |
< |
if not os.path.exists(tmp): |
124 |
< |
raise CrabException("Additional input file not found: "+tmp) |
125 |
< |
tmp=string.strip(tmp) |
126 |
< |
self.additional_inbox_files.append(tmp) |
127 |
< |
pass |
128 |
< |
pass |
129 |
< |
except KeyError: |
130 |
< |
pass |
130 |
> |
self.scriptExe = cfg_params.get('USER.script_exe',None) |
131 |
> |
if self.scriptExe : |
132 |
> |
if not os.path.isfile(self.scriptExe): |
133 |
> |
msg ="ERROR. file "+self.scriptExe+" not found" |
134 |
> |
raise CrabException(msg) |
135 |
> |
self.additional_inbox_files.append(string.strip(self.scriptExe)) |
136 |
|
|
137 |
< |
try: |
138 |
< |
self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele |
134 |
< |
except KeyError: |
135 |
< |
self.filesPerJob = 1 |
137 |
> |
self.AdditionalArgs = cfg_params.get('USER.script_arguments',None) |
138 |
> |
if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ') |
139 |
|
|
140 |
< |
## Max event will be total_number_of_events ??? Daniele |
141 |
< |
try: |
139 |
< |
self.maxEv = cfg_params['CMSSW.event_per_job'] |
140 |
< |
except KeyError: |
141 |
< |
self.maxEv = "-1" |
142 |
< |
## |
143 |
< |
try: |
144 |
< |
self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events']) |
145 |
< |
except KeyError: |
146 |
< |
msg = 'Must define total_number_of_events' |
140 |
> |
if self.datasetPath == None and self.pset == None and self.scriptExe == '' : |
141 |
> |
msg ="Error. script_exe not defined" |
142 |
|
raise CrabException(msg) |
148 |
– |
|
149 |
– |
CEBlackList = [] |
150 |
– |
try: |
151 |
– |
tmpBad = string.split(cfg_params['EDG.ce_black_list'],',') |
152 |
– |
for tmp in tmpBad: |
153 |
– |
tmp=string.strip(tmp) |
154 |
– |
CEBlackList.append(tmp) |
155 |
– |
except KeyError: |
156 |
– |
pass |
157 |
– |
|
158 |
– |
self.reCEBlackList=[] |
159 |
– |
for bad in CEBlackList: |
160 |
– |
self.reCEBlackList.append(re.compile( bad )) |
143 |
|
|
144 |
< |
common.logger.debug(5,'CEBlackList: '+str(CEBlackList)) |
144 |
> |
# use parent files... |
145 |
> |
self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0)) |
146 |
|
|
147 |
< |
CEWhiteList = [] |
148 |
< |
try: |
149 |
< |
tmpGood = string.split(cfg_params['EDG.ce_white_list'],',') |
150 |
< |
for tmp in tmpGood: |
151 |
< |
tmp=string.strip(tmp) |
152 |
< |
CEWhiteList.append(tmp) |
153 |
< |
except KeyError: |
147 |
> |
## additional input files |
148 |
> |
if cfg_params.has_key('USER.additional_input_files'): |
149 |
> |
tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',') |
150 |
> |
for tmp in tmpAddFiles: |
151 |
> |
tmp = string.strip(tmp) |
152 |
> |
dirname = '' |
153 |
> |
if not tmp[0]=="/": dirname = "." |
154 |
> |
files = [] |
155 |
> |
if string.find(tmp,"*")>-1: |
156 |
> |
files = glob.glob(os.path.join(dirname, tmp)) |
157 |
> |
if len(files)==0: |
158 |
> |
raise CrabException("No additional input file found with this pattern: "+tmp) |
159 |
> |
else: |
160 |
> |
files.append(tmp) |
161 |
> |
for file in files: |
162 |
> |
if not os.path.exists(file): |
163 |
> |
raise CrabException("Additional input file not found: "+file) |
164 |
> |
pass |
165 |
> |
self.additional_inbox_files.append(string.strip(file)) |
166 |
> |
pass |
167 |
|
pass |
168 |
+ |
common.logger.debug("Additional input files: "+str(self.additional_inbox_files)) |
169 |
+ |
pass |
170 |
|
|
173 |
– |
#print 'CEWhiteList: ',CEWhiteList |
174 |
– |
self.reCEWhiteList=[] |
175 |
– |
for Good in CEWhiteList: |
176 |
– |
self.reCEWhiteList.append(re.compile( Good )) |
177 |
– |
|
178 |
– |
common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList)) |
179 |
– |
|
180 |
– |
self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset |
171 |
|
|
172 |
+ |
## New method of dealing with seeds |
173 |
+ |
self.incrementSeeds = [] |
174 |
+ |
self.preserveSeeds = [] |
175 |
+ |
if cfg_params.has_key('CMSSW.preserve_seeds'): |
176 |
+ |
tmpList = cfg_params['CMSSW.preserve_seeds'].split(',') |
177 |
+ |
for tmp in tmpList: |
178 |
+ |
tmp.strip() |
179 |
+ |
self.preserveSeeds.append(tmp) |
180 |
+ |
if cfg_params.has_key('CMSSW.increment_seeds'): |
181 |
+ |
tmpList = cfg_params['CMSSW.increment_seeds'].split(',') |
182 |
+ |
for tmp in tmpList: |
183 |
+ |
tmp.strip() |
184 |
+ |
self.incrementSeeds.append(tmp) |
185 |
+ |
|
186 |
+ |
self.firstRun = cfg_params.get('CMSSW.first_run',None) |
187 |
+ |
|
188 |
+ |
# Copy/return |
189 |
+ |
self.copy_data = int(cfg_params.get('USER.copy_data',0)) |
190 |
+ |
self.return_data = int(cfg_params.get('USER.return_data',0)) |
191 |
+ |
|
192 |
+ |
self.conf = {} |
193 |
+ |
self.conf['pubdata'] = None |
194 |
+ |
# number of jobs requested to be created, limit obj splitting DD |
195 |
|
#DBSDLS-start |
196 |
< |
## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code |
196 |
> |
## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code |
197 |
|
self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py) |
198 |
|
self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script) |
199 |
+ |
self.jobDestination=[] # Site destination(s) for each job (list of lists) |
200 |
|
## Perform the data location and discovery (based on DBS/DLS) |
201 |
< |
self.DataDiscoveryAndLocation(cfg_params) |
202 |
< |
#DBSDLS-end |
201 |
> |
## SL: Don't if NONE is specified as input (pythia use case) |
202 |
> |
blockSites = {} |
203 |
> |
if self.datasetPath: |
204 |
> |
blockSites = self.DataDiscoveryAndLocation(cfg_params) |
205 |
> |
#DBSDLS-end |
206 |
> |
self.conf['blockSites']=blockSites |
207 |
> |
|
208 |
> |
## Select Splitting |
209 |
> |
splitByRun = int(cfg_params.get('CMSSW.split_by_run',0)) |
210 |
> |
|
211 |
> |
if self.selectNoInput: |
212 |
> |
if self.pset == None: |
213 |
> |
self.algo = 'ForScript' |
214 |
> |
else: |
215 |
> |
self.algo = 'NoInput' |
216 |
> |
self.conf['managedGenerators']=self.managedGenerators |
217 |
> |
self.conf['generator']=self.generator |
218 |
> |
elif splitByRun ==1: |
219 |
> |
self.algo = 'RunBased' |
220 |
> |
else: |
221 |
> |
self.algo = 'EventBased' |
222 |
> |
|
223 |
> |
# self.algo = 'LumiBased' |
224 |
> |
splitter = JobSplitter(self.cfg_params,self.conf) |
225 |
> |
self.dict = splitter.Algos()[self.algo]() |
226 |
> |
|
227 |
> |
self.argsFile= '%s/arguments.xml'%common.work_space.shareDir() |
228 |
> |
self.rootArgsFilename= 'arguments' |
229 |
> |
# modify Pset only the first time |
230 |
> |
if (isNew and self.pset != None): self.ModifyPset() |
231 |
|
|
232 |
< |
self.tgzNameWithPath = self.getTarBall(self.executable) |
232 |
> |
## Prepare inputSandbox TarBall (only the first time) |
233 |
> |
self.tarNameWithPath = self.getTarBall(self.executable) |
234 |
> |
|
235 |
> |
|
236 |
> |
def ModifyPset(self): |
237 |
> |
import PsetManipulator as pp |
238 |
> |
PsetEdit = pp.PsetManipulator(self.pset) |
239 |
> |
try: |
240 |
> |
# Add FrameworkJobReport to parameter-set, set max events. |
241 |
> |
# Reset later for data jobs by writeCFG which does all modifications |
242 |
> |
PsetEdit.maxEvent(1) |
243 |
> |
PsetEdit.skipEvent(0) |
244 |
> |
PsetEdit.psetWriter(self.configFilename()) |
245 |
> |
## If present, add TFileService to output files |
246 |
> |
if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)): |
247 |
> |
tfsOutput = PsetEdit.getTFileService() |
248 |
> |
if tfsOutput: |
249 |
> |
if tfsOutput in self.output_file: |
250 |
> |
common.logger.debug("Output from TFileService "+tfsOutput+" already in output files") |
251 |
> |
else: |
252 |
> |
outfileflag = True #output found |
253 |
> |
self.output_file.append(tfsOutput) |
254 |
> |
common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files") |
255 |
> |
pass |
256 |
> |
pass |
257 |
> |
## If present and requested, add PoolOutputModule to output files |
258 |
> |
edmOutput = PsetEdit.getPoolOutputModule() |
259 |
> |
if int(self.cfg_params.get('CMSSW.get_edm_output',0)): |
260 |
> |
if edmOutput: |
261 |
> |
if edmOutput in self.output_file: |
262 |
> |
common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files") |
263 |
> |
else: |
264 |
> |
self.output_file.append(edmOutput) |
265 |
> |
common.logger.info("Adding "+edmOutput+" (from PoolOutputModule) to list of output files") |
266 |
> |
pass |
267 |
> |
pass |
268 |
> |
# not required: check anyhow if present, to avoid accidental T2 overload |
269 |
> |
else: |
270 |
> |
if edmOutput and (edmOutput not in self.output_file): |
271 |
> |
msg = "ERROR: a PoolOutputModule is present in your ParameteSet %s \n"%self.pset |
272 |
> |
msg +=" but the file produced ( %s ) is not in the list of output files\n"%edmOutput |
273 |
> |
msg += "WARNING: please remove it. If you want to keep it, add the file to output_files or use CMSSW.get_edm_output\n" |
274 |
> |
if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)): |
275 |
> |
msg +=" CMSSW.ignore_edm_output==True : Hope you know what you are doing...\n" |
276 |
> |
common.logger.info(msg) |
277 |
> |
else: |
278 |
> |
raise CrabException(msg) |
279 |
> |
pass |
280 |
> |
pass |
281 |
> |
|
282 |
> |
if (PsetEdit.getBadFilesSetting()): |
283 |
> |
msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified." |
284 |
> |
common.logger.info(msg) |
285 |
> |
|
286 |
> |
except CrabException, msg: |
287 |
> |
common.logger.info(str(msg)) |
288 |
> |
msg='Error while manipulating ParameterSet (see previous message, if any): exiting...' |
289 |
> |
raise CrabException(msg) |
290 |
|
|
192 |
– |
self.jobSplitting() #Daniele job Splitting |
193 |
– |
self.PsetEdit.maxEvent(self.maxEv) #Daniele |
194 |
– |
self.PsetEdit.inputModule("INPUT") #Daniele |
195 |
– |
self.PsetEdit.psetWriter(self.configFilename()) |
291 |
|
|
292 |
|
def DataDiscoveryAndLocation(self, cfg_params): |
293 |
|
|
294 |
< |
common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()") |
294 |
> |
import DataDiscovery |
295 |
> |
import DataLocation |
296 |
> |
common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()") |
297 |
|
|
201 |
– |
#datasetPath = "/"+self.owner+"/"+self.dataTiers[0]+"/"+self.dataset |
202 |
– |
|
298 |
|
datasetPath=self.datasetPath |
299 |
|
|
205 |
– |
## TODO |
206 |
– |
dataTiersList = "" |
207 |
– |
dataTiers = dataTiersList.split(',') |
208 |
– |
|
300 |
|
## Contact the DBS |
301 |
+ |
common.logger.info("Contacting Data Discovery Services ...") |
302 |
|
try: |
303 |
< |
self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, dataTiers) |
303 |
> |
self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks) |
304 |
|
self.pubdata.fetchDBSInfo() |
305 |
|
|
306 |
< |
except DataDiscovery_EDM.NotExistingDatasetError, ex : |
306 |
> |
except DataDiscovery.NotExistingDatasetError, ex : |
307 |
|
msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage() |
308 |
|
raise CrabException(msg) |
309 |
< |
|
218 |
< |
except DataDiscovery_EDM.NoDataTierinProvenanceError, ex : |
309 |
> |
except DataDiscovery.NoDataTierinProvenanceError, ex : |
310 |
|
msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage() |
311 |
|
raise CrabException(msg) |
312 |
< |
except DataDiscovery_EDM.DataDiscoveryError, ex: |
313 |
< |
msg = 'ERROR ***: failed Data Discovery in DBS %s'%ex.getErrorMessage() |
312 |
> |
except DataDiscovery.DataDiscoveryError, ex: |
313 |
> |
msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage() |
314 |
|
raise CrabException(msg) |
315 |
|
|
316 |
< |
## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner) |
317 |
< |
## self.DBSPaths=self.pubdata.getDBSPaths() |
318 |
< |
common.logger.message("Required data are :"+self.datasetPath) |
228 |
< |
|
229 |
< |
filesbyblock=self.pubdata.getFiles() |
230 |
< |
self.AllInputFiles=filesbyblock.values() |
231 |
< |
self.files = self.AllInputFiles |
232 |
< |
|
233 |
< |
## TEMP |
234 |
< |
# self.filesTmp = filesbyblock.values() |
235 |
< |
# self.files = [] |
236 |
< |
# locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/' |
237 |
< |
# locPath='' |
238 |
< |
# tmp = [] |
239 |
< |
# for file in self.filesTmp[0]: |
240 |
< |
# tmp.append(locPath+file) |
241 |
< |
# self.files.append(tmp) |
242 |
< |
## END TEMP |
316 |
> |
self.filesbyblock=self.pubdata.getFiles() |
317 |
> |
#print self.filesbyblock |
318 |
> |
self.conf['pubdata']=self.pubdata |
319 |
|
|
320 |
|
## get max number of events |
321 |
< |
#common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents()) |
246 |
< |
self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py |
247 |
< |
common.logger.message("\nThe number of available events is %s"%self.maxEvents) |
321 |
> |
self.maxEvents=self.pubdata.getMaxEvents() |
322 |
|
|
323 |
|
## Contact the DLS and build a list of sites hosting the fileblocks |
324 |
|
try: |
325 |
< |
dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params) |
325 |
> |
dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params) |
326 |
|
dataloc.fetchDLSInfo() |
327 |
< |
except DataLocation_EDM.DataLocationError , ex: |
327 |
> |
|
328 |
> |
except DataLocation.DataLocationError , ex: |
329 |
|
msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage() |
330 |
|
raise CrabException(msg) |
331 |
< |
|
332 |
< |
allsites=dataloc.getSites() |
333 |
< |
common.logger.debug(5,"sites are %s"%allsites) |
334 |
< |
sites=self.checkBlackList(allsites) |
335 |
< |
common.logger.debug(5,"sites are (after black list) %s"%sites) |
336 |
< |
sites=self.checkWhiteList(sites) |
337 |
< |
common.logger.debug(5,"sites are (after white list) %s"%sites) |
331 |
> |
|
332 |
> |
|
333 |
> |
unsorted_sites = dataloc.getSites() |
334 |
> |
sites = self.filesbyblock.fromkeys(self.filesbyblock,'') |
335 |
> |
for lfn in self.filesbyblock.keys(): |
336 |
> |
if unsorted_sites.has_key(lfn): |
337 |
> |
sites[lfn]=unsorted_sites[lfn] |
338 |
> |
else: |
339 |
> |
sites[lfn]=[] |
340 |
|
|
341 |
|
if len(sites)==0: |
342 |
< |
msg = 'No sites hosting all the needed data! Exiting... ' |
342 |
> |
msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath |
343 |
> |
msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n" |
344 |
> |
msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n" |
345 |
|
raise CrabException(msg) |
346 |
|
|
347 |
< |
common.logger.message("List of Sites hosting the data : "+str(sites)) |
348 |
< |
common.logger.debug(6, "List of Sites: "+str(sites)) |
349 |
< |
common.analisys_common_info['sites']=sites ## used in SchedulerEdg.py in createSchScript |
350 |
< |
return |
351 |
< |
|
352 |
< |
def jobSplitting(self): |
274 |
< |
""" |
275 |
< |
first implemntation for job splitting |
276 |
< |
""" |
277 |
< |
# print 'eventi totali '+str(self.maxEvents) |
278 |
< |
# print 'eventi totali richiesti dallo user '+str(self.total_number_of_events) |
279 |
< |
#print 'files per job '+str(self.filesPerJob) |
280 |
< |
common.logger.message('Required '+str(self.filesPerJob)+' files per job ') |
281 |
< |
common.logger.message('Required '+str(self.total_number_of_events)+' events in total ') |
282 |
< |
|
283 |
< |
## TODO: SL need to have (from DBS) a detailed list of how many events per each file |
284 |
< |
n_tot_files = (len(self.files[0])) |
285 |
< |
## SL: this is wrong if the files have different number of events |
286 |
< |
evPerFile = int(self.maxEvents)/n_tot_files |
287 |
< |
|
288 |
< |
common.logger.debug(5,'Events per File '+str(evPerFile)) |
289 |
< |
|
290 |
< |
## if asked to process all events, do it |
291 |
< |
if self.total_number_of_events == -1: |
292 |
< |
self.total_number_of_events=self.maxEvents |
293 |
< |
self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob) |
294 |
< |
common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events') |
295 |
< |
|
296 |
< |
else: |
297 |
< |
self.total_number_of_files = int(self.total_number_of_events/evPerFile) |
298 |
< |
## SL: if ask for less event than what is computed to be available on a |
299 |
< |
## file, process the first file anyhow. |
300 |
< |
if self.total_number_of_files == 0: |
301 |
< |
self.total_number_of_files = self.total_number_of_files + 1 |
302 |
< |
|
303 |
< |
common.logger.debug(5,'N files '+str(self.total_number_of_files)) |
304 |
< |
|
305 |
< |
check = 0 |
306 |
< |
|
307 |
< |
## Compute the number of jobs |
308 |
< |
#self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob) |
309 |
< |
self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob) |
310 |
< |
common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs)) |
311 |
< |
|
312 |
< |
## is there any remainder? |
313 |
< |
check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob) |
314 |
< |
|
315 |
< |
common.logger.debug(5,'Check '+str(check)) |
316 |
< |
|
317 |
< |
if check > 0: |
318 |
< |
self.total_number_of_jobs = self.total_number_of_jobs + 1 |
319 |
< |
common.logger.message('Warning: last job will be created with '+str(check)+' files') |
347 |
> |
allSites = [] |
348 |
> |
listSites = sites.values() |
349 |
> |
for listSite in listSites: |
350 |
> |
for oneSite in listSite: |
351 |
> |
allSites.append(oneSite) |
352 |
> |
[allSites.append(it) for it in allSites if not allSites.count(it)] |
353 |
|
|
321 |
– |
common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)*self.filesPerJob*evPerFile + check*evPerFile)+' events') |
322 |
– |
pass |
354 |
|
|
355 |
< |
list_of_lists = [] |
356 |
< |
for i in xrange(0, int(n_tot_files), self.filesPerJob): |
326 |
< |
list_of_lists.append(self.files[0][i: i+self.filesPerJob]) |
355 |
> |
# screen output |
356 |
> |
common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n") |
357 |
|
|
358 |
< |
self.list_of_files = list_of_lists |
359 |
< |
|
360 |
< |
return |
358 |
> |
return sites |
359 |
> |
|
360 |
> |
|
361 |
> |
def split(self, jobParams,firstJobID): |
362 |
> |
|
363 |
> |
jobParams = self.dict['args'] |
364 |
> |
njobs = self.dict['njobs'] |
365 |
> |
self.jobDestination = self.dict['jobDestination'] |
366 |
> |
|
367 |
> |
if njobs==0: |
368 |
> |
raise CrabException("Ask to split "+str(njobs)+" jobs: aborting") |
369 |
|
|
332 |
– |
def split(self, jobParams): |
333 |
– |
|
334 |
– |
common.jobDB.load() |
335 |
– |
#### Fabio |
336 |
– |
njobs = self.total_number_of_jobs |
337 |
– |
filelist = self.list_of_files |
370 |
|
# create the empty structure |
371 |
|
for i in range(njobs): |
372 |
|
jobParams.append("") |
341 |
– |
|
342 |
– |
for job in range(njobs): |
343 |
– |
jobParams[job] = filelist[job] |
344 |
– |
common.jobDB.setArguments(job, jobParams[job]) |
373 |
|
|
374 |
< |
common.jobDB.save() |
374 |
> |
listID=[] |
375 |
> |
listField=[] |
376 |
> |
listDictions=[] |
377 |
> |
exist= os.path.exists(self.argsFile) |
378 |
> |
for id in range(njobs): |
379 |
> |
job = id + int(firstJobID) |
380 |
> |
listID.append(job+1) |
381 |
> |
job_ToSave ={} |
382 |
> |
concString = ' ' |
383 |
> |
argu='' |
384 |
> |
str_argu = str(job+1) |
385 |
> |
if len(jobParams[id]): |
386 |
> |
argu = {'JobID': job+1} |
387 |
> |
for i in range(len(jobParams[id])): |
388 |
> |
argu[self.dict['params'][i]]=jobParams[id][i] |
389 |
> |
# just for debug |
390 |
> |
str_argu += concString.join(jobParams[id]) |
391 |
> |
if argu != '': listDictions.append(argu) |
392 |
> |
job_ToSave['arguments']= str(job+1) |
393 |
> |
job_ToSave['dlsDestination']= self.jobDestination[id] |
394 |
> |
listField.append(job_ToSave) |
395 |
> |
from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap |
396 |
> |
cms_se = CmsSEMap() |
397 |
> |
msg="Job %s Arguments: %s\n"%(str(job+1),str_argu) |
398 |
> |
msg+="\t Destination: %s "%(str(self.jobDestination[id])) |
399 |
> |
SEDestination = [cms_se[dest] for dest in self.jobDestination[id]] |
400 |
> |
msg+="\t CMSDestination: %s "%(str(SEDestination)) |
401 |
> |
common.logger.log(10-1,msg) |
402 |
> |
# write xml |
403 |
> |
if len(listDictions): |
404 |
> |
if exist==False: self.CreateXML() |
405 |
> |
self.addEntry(listDictions) |
406 |
> |
self.addXMLfile() |
407 |
> |
common._db.updateJob_(listID,listField) |
408 |
> |
self.zipTarFile() |
409 |
|
return |
348 |
– |
|
349 |
– |
def getJobTypeArguments(self, nj, sched): |
350 |
– |
params = common.jobDB.arguments(nj) |
351 |
– |
#print params |
352 |
– |
parString = "\\{" |
353 |
– |
|
354 |
– |
for i in range(len(params) - 1): |
355 |
– |
parString += '\\\"' + params[i] + '\\\"\,' |
356 |
– |
|
357 |
– |
parString += '\\\"' + params[len(params) - 1] + '\\\"\\}' |
358 |
– |
return parString |
359 |
– |
|
360 |
– |
def numberOfJobs(self): |
361 |
– |
# Fabio |
410 |
|
|
411 |
< |
return self.total_number_of_jobs |
412 |
< |
|
411 |
> |
def addXMLfile(self): |
412 |
> |
|
413 |
> |
import tarfile |
414 |
> |
try: |
415 |
> |
tar = tarfile.open(self.tarNameWithPath, "a") |
416 |
> |
tar.add(self.argsFile, os.path.basename(self.argsFile)) |
417 |
> |
tar.close() |
418 |
> |
except IOError, exc: |
419 |
> |
msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath) |
420 |
> |
msg += str(exc) |
421 |
> |
raise CrabException(msg) |
422 |
> |
except tarfile.TarError, exc: |
423 |
> |
msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath) |
424 |
> |
msg += str(exc) |
425 |
> |
raise CrabException(msg) |
426 |
|
|
427 |
+ |
def CreateXML(self): |
428 |
+ |
""" |
429 |
+ |
""" |
430 |
+ |
result = IMProvNode( self.rootArgsFilename ) |
431 |
+ |
outfile = file( self.argsFile, 'w').write(str(result)) |
432 |
+ |
return |
433 |
|
|
434 |
< |
def checkBlackList(self, allSites): |
435 |
< |
if len(self.reCEBlackList)==0: return allSites |
436 |
< |
sites = [] |
370 |
< |
for site in allSites: |
371 |
< |
common.logger.debug(10,'Site '+site) |
372 |
< |
good=1 |
373 |
< |
for re in self.reCEBlackList: |
374 |
< |
if re.search(site): |
375 |
< |
common.logger.message('CE in black list, skipping site '+site) |
376 |
< |
good=0 |
377 |
< |
pass |
378 |
< |
if good: sites.append(site) |
379 |
< |
if len(sites) == 0: |
380 |
< |
common.logger.debug(3,"No sites found after BlackList") |
381 |
< |
return sites |
434 |
> |
def addEntry(self, listDictions): |
435 |
> |
""" |
436 |
> |
_addEntry_ |
437 |
|
|
438 |
< |
def checkWhiteList(self, allSites): |
438 |
> |
add an entry to the xml file |
439 |
> |
""" |
440 |
> |
from IMProv.IMProvLoader import loadIMProvFile |
441 |
> |
## load xml |
442 |
> |
improvDoc = loadIMProvFile(self.argsFile) |
443 |
> |
entrname= 'Job' |
444 |
> |
for dictions in listDictions: |
445 |
> |
report = IMProvNode(entrname , None, **dictions) |
446 |
> |
improvDoc.addNode(report) |
447 |
> |
outfile = file( self.argsFile, 'w').write(str(improvDoc)) |
448 |
> |
return |
449 |
|
|
450 |
< |
if len(self.reCEWhiteList)==0: return allSites |
451 |
< |
sites = [] |
387 |
< |
for site in allSites: |
388 |
< |
good=0 |
389 |
< |
for re in self.reCEWhiteList: |
390 |
< |
if re.search(site): |
391 |
< |
common.logger.debug(5,'CE in white list, adding site '+site) |
392 |
< |
good=1 |
393 |
< |
if not good: continue |
394 |
< |
sites.append(site) |
395 |
< |
if len(sites) == 0: |
396 |
< |
common.logger.message("No sites found after WhiteList\n") |
397 |
< |
else: |
398 |
< |
common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n") |
399 |
< |
return sites |
450 |
> |
def numberOfJobs(self): |
451 |
> |
return self.dict['njobs'] |
452 |
|
|
453 |
|
def getTarBall(self, exe): |
454 |
|
""" |
455 |
|
Return the TarBall with lib and exe |
456 |
|
""" |
457 |
< |
|
458 |
< |
# if it exist, just return it |
459 |
< |
self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name |
408 |
< |
if os.path.exists(self.tgzNameWithPath): |
409 |
< |
return self.tgzNameWithPath |
457 |
> |
self.tarNameWithPath = common.work_space.pathForTgz()+self.tar_name |
458 |
> |
if os.path.exists(self.tarNameWithPath): |
459 |
> |
return self.tarNameWithPath |
460 |
|
|
461 |
|
# Prepare a tar gzipped file with user binaries. |
462 |
|
self.buildTar_(exe) |
463 |
|
|
464 |
< |
return string.strip(self.tgzNameWithPath) |
464 |
> |
return string.strip(self.tarNameWithPath) |
465 |
|
|
466 |
|
def buildTar_(self, executable): |
467 |
|
|
468 |
|
# First of all declare the user Scram area |
469 |
|
swArea = self.scram.getSWArea_() |
420 |
– |
#print "swArea = ", swArea |
421 |
– |
swVersion = self.scram.getSWVersion() |
422 |
– |
#print "swVersion = ", swVersion |
470 |
|
swReleaseTop = self.scram.getReleaseTop_() |
471 |
< |
#print "swReleaseTop = ", swReleaseTop |
425 |
< |
|
471 |
> |
|
472 |
|
## check if working area is release top |
473 |
|
if swReleaseTop == '' or swArea == swReleaseTop: |
474 |
+ |
common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop) |
475 |
|
return |
476 |
|
|
477 |
< |
filesToBeTarred = [] |
478 |
< |
## First find the executable |
479 |
< |
if (self.executable != ''): |
480 |
< |
exeWithPath = self.scram.findFile_(executable) |
481 |
< |
# print exeWithPath |
482 |
< |
if ( not exeWithPath ): |
483 |
< |
raise CrabException('User executable '+executable+' not found') |
484 |
< |
|
485 |
< |
## then check if it's private or not |
486 |
< |
if exeWithPath.find(swReleaseTop) == -1: |
487 |
< |
# the exe is private, so we must ship |
488 |
< |
common.logger.debug(5,"Exe "+exeWithPath+" to be tarred") |
489 |
< |
path = swArea+'/' |
490 |
< |
exe = string.replace(exeWithPath, path,'') |
491 |
< |
filesToBeTarred.append(exe) |
477 |
> |
import tarfile |
478 |
> |
try: # create tar ball |
479 |
> |
#tar = tarfile.open(self.tgzNameWithPath, "w:gz") |
480 |
> |
tar = tarfile.open(self.tarNameWithPath, "w") |
481 |
> |
## First find the executable |
482 |
> |
if (self.executable != ''): |
483 |
> |
exeWithPath = self.scram.findFile_(executable) |
484 |
> |
if ( not exeWithPath ): |
485 |
> |
raise CrabException('User executable '+executable+' not found') |
486 |
> |
|
487 |
> |
## then check if it's private or not |
488 |
> |
if exeWithPath.find(swReleaseTop) == -1: |
489 |
> |
# the exe is private, so we must ship |
490 |
> |
common.logger.debug("Exe "+exeWithPath+" to be tarred") |
491 |
> |
path = swArea+'/' |
492 |
> |
# distinguish case when script is in user project area or given by full path somewhere else |
493 |
> |
if exeWithPath.find(path) >= 0 : |
494 |
> |
exe = string.replace(exeWithPath, path,'') |
495 |
> |
tar.add(path+exe,exe) |
496 |
> |
else : |
497 |
> |
tar.add(exeWithPath,os.path.basename(executable)) |
498 |
> |
pass |
499 |
> |
else: |
500 |
> |
# the exe is from release, we'll find it on WN |
501 |
> |
pass |
502 |
> |
|
503 |
> |
## Now get the libraries: only those in local working area |
504 |
> |
tar.dereference=True |
505 |
> |
libDir = 'lib' |
506 |
> |
lib = swArea+'/' +libDir |
507 |
> |
common.logger.debug("lib "+lib+" to be tarred") |
508 |
> |
if os.path.exists(lib): |
509 |
> |
tar.add(lib,libDir) |
510 |
> |
|
511 |
> |
## Now check if module dir is present |
512 |
> |
moduleDir = 'module' |
513 |
> |
module = swArea + '/' + moduleDir |
514 |
> |
if os.path.isdir(module): |
515 |
> |
tar.add(module,moduleDir) |
516 |
> |
tar.dereference=False |
517 |
> |
|
518 |
> |
## Now check if any data dir(s) is present |
519 |
> |
self.dataExist = False |
520 |
> |
todo_list = [(i, i) for i in os.listdir(swArea+"/src")] |
521 |
> |
while len(todo_list): |
522 |
> |
entry, name = todo_list.pop() |
523 |
> |
if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS': |
524 |
> |
continue |
525 |
> |
if os.path.isdir(swArea+"/src/"+entry): |
526 |
> |
entryPath = entry + '/' |
527 |
> |
todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)] |
528 |
> |
if name == 'data': |
529 |
> |
self.dataExist=True |
530 |
> |
common.logger.debug("data "+entry+" to be tarred") |
531 |
> |
tar.add(swArea+"/src/"+entry,"src/"+entry) |
532 |
> |
pass |
533 |
|
pass |
534 |
< |
else: |
535 |
< |
# the exe is from release, we'll find it on WN |
534 |
> |
|
535 |
> |
### CMSSW ParameterSet |
536 |
> |
if not self.pset is None: |
537 |
> |
cfg_file = common.work_space.jobDir()+self.configFilename() |
538 |
> |
tar.add(cfg_file,self.configFilename()) |
539 |
> |
|
540 |
> |
try: |
541 |
> |
crab_cfg_file = common.work_space.shareDir()+'/crab.cfg' |
542 |
> |
tar.add(crab_cfg_file,'crab.cfg') |
543 |
> |
except: |
544 |
|
pass |
545 |
< |
|
546 |
< |
## Now get the libraries: only those in local working area |
547 |
< |
libDir = 'lib' |
548 |
< |
lib = swArea+'/' +libDir |
549 |
< |
common.logger.debug(5,"lib "+lib+" to be tarred") |
550 |
< |
if os.path.exists(lib): |
551 |
< |
filesToBeTarred.append(libDir) |
552 |
< |
|
553 |
< |
## Now check if module dir is present |
554 |
< |
moduleDir = 'module' |
555 |
< |
if os.path.isdir(swArea+'/'+moduleDir): |
556 |
< |
filesToBeTarred.append(moduleDir) |
557 |
< |
|
558 |
< |
## Now check if the Data dir is present |
559 |
< |
dataDir = 'src/Data/' |
560 |
< |
if os.path.isdir(swArea+'/'+dataDir): |
561 |
< |
filesToBeTarred.append(dataDir) |
562 |
< |
|
563 |
< |
## Create the tar-ball |
564 |
< |
if len(filesToBeTarred)>0: |
565 |
< |
cwd = os.getcwd() |
566 |
< |
os.chdir(swArea) |
567 |
< |
tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' ' |
568 |
< |
for line in filesToBeTarred: |
569 |
< |
tarcmd = tarcmd + line + ' ' |
570 |
< |
cout = runCommand(tarcmd) |
571 |
< |
if not cout: |
572 |
< |
raise CrabException('Could not create tar-ball') |
573 |
< |
os.chdir(cwd) |
574 |
< |
else: |
575 |
< |
common.logger.debug(5,"No files to be to be tarred") |
576 |
< |
|
577 |
< |
return |
578 |
< |
|
579 |
< |
def wsSetupEnvironment(self, nj): |
545 |
> |
|
546 |
> |
## Add ProdCommon dir to tar |
547 |
> |
prodcommonDir = './' |
548 |
> |
prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/' |
549 |
> |
neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \ |
550 |
> |
'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \ |
551 |
> |
'WMCore/__init__.py','WMCore/Algorithms'] |
552 |
> |
for file in neededStuff: |
553 |
> |
tar.add(prodcommonPath+file,prodcommonDir+file) |
554 |
> |
|
555 |
> |
##### ML stuff |
556 |
> |
ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py'] |
557 |
> |
path=os.environ['CRABDIR'] + '/python/' |
558 |
> |
for file in ML_file_list: |
559 |
> |
tar.add(path+file,file) |
560 |
> |
|
561 |
> |
##### Utils |
562 |
> |
Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py'] |
563 |
> |
for file in Utils_file_list: |
564 |
> |
tar.add(path+file,file) |
565 |
> |
|
566 |
> |
##### AdditionalFiles |
567 |
> |
tar.dereference=True |
568 |
> |
for file in self.additional_inbox_files: |
569 |
> |
tar.add(file,string.split(file,'/')[-1]) |
570 |
> |
tar.dereference=False |
571 |
> |
common.logger.log(10-1,"Files in "+self.tarNameWithPath+" : "+str(tar.getnames())) |
572 |
> |
|
573 |
> |
tar.close() |
574 |
> |
except IOError, exc: |
575 |
> |
msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath |
576 |
> |
msg += str(exc) |
577 |
> |
raise CrabException(msg) |
578 |
> |
except tarfile.TarError, exc: |
579 |
> |
msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath |
580 |
> |
msg += str(exc) |
581 |
> |
raise CrabException(msg) |
582 |
> |
|
583 |
> |
def zipTarFile(self): |
584 |
> |
|
585 |
> |
cmd = "gzip -c %s > %s "%(self.tarNameWithPath,self.tgzNameWithPath) |
586 |
> |
res=runCommand(cmd) |
587 |
> |
|
588 |
> |
tarballinfo = os.stat(self.tgzNameWithPath) |
589 |
> |
if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) : |
590 |
> |
msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \ |
591 |
> |
+'MB input sandbox limit \n' |
592 |
> |
msg += ' and not supported by the direct GRID submission system.\n' |
593 |
> |
msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n' |
594 |
> |
msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users' |
595 |
> |
raise CrabException(msg) |
596 |
> |
|
597 |
> |
## create tar-ball with ML stuff |
598 |
> |
|
599 |
> |
def wsSetupEnvironment(self, nj=0): |
600 |
|
""" |
601 |
|
Returns part of a job script which prepares |
602 |
|
the execution environment for the job 'nj'. |
603 |
|
""" |
604 |
+ |
# FUTURE: Drop support for .cfg when possible |
605 |
+ |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
606 |
+ |
psetName = 'pset.py' |
607 |
+ |
else: |
608 |
+ |
psetName = 'pset.cfg' |
609 |
|
# Prepare JobType-independent part |
610 |
< |
txt = '' |
611 |
< |
|
612 |
< |
## OLI_Daniele at this level middleware already known |
492 |
< |
|
493 |
< |
txt += 'if [ $middleware == LCG ]; then \n' |
610 |
> |
txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n' |
611 |
> |
txt += 'echo ">>> setup environment"\n' |
612 |
> |
txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n' |
613 |
|
txt += self.wsSetupCMSLCGEnvironment_() |
614 |
|
txt += 'elif [ $middleware == OSG ]; then\n' |
615 |
< |
txt += ' time=`date -u +"%s"`\n' |
616 |
< |
txt += ' WORKING_DIR=$OSG_WN_TMP/cms_$time\n' |
617 |
< |
txt += ' echo "Creating working directory: $WORKING_DIR"\n' |
618 |
< |
txt += ' /bin/mkdir -p $WORKING_DIR\n' |
619 |
< |
txt += ' if [ ! -d $WORKING_DIR ] ;then\n' |
501 |
< |
txt += ' echo "OSG WORKING DIR ==> $WORKING_DIR could not be created on on WN `hostname`"\n' |
502 |
< |
|
503 |
< |
txt += ' echo "JOB_EXIT_STATUS = 1"\n' |
504 |
< |
txt += ' exit 1\n' |
615 |
> |
txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n' |
616 |
> |
txt += ' if [ ! $? == 0 ] ;then\n' |
617 |
> |
txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n' |
618 |
> |
txt += ' job_exit_code=10016\n' |
619 |
> |
txt += ' func_exit\n' |
620 |
|
txt += ' fi\n' |
621 |
+ |
txt += ' echo ">>> Created working directory: $WORKING_DIR"\n' |
622 |
|
txt += '\n' |
623 |
|
txt += ' echo "Change to working directory: $WORKING_DIR"\n' |
624 |
|
txt += ' cd $WORKING_DIR\n' |
625 |
< |
txt += self.wsSetupCMSOSGEnvironment_() |
625 |
> |
txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n' |
626 |
> |
txt += self.wsSetupCMSOSGEnvironment_() |
627 |
> |
#Setup SGE Environment |
628 |
> |
txt += 'elif [ $middleware == SGE ]; then\n' |
629 |
> |
txt += self.wsSetupCMSLCGEnvironment_() |
630 |
> |
|
631 |
> |
txt += 'elif [ $middleware == ARC ]; then\n' |
632 |
> |
txt += self.wsSetupCMSLCGEnvironment_() |
633 |
> |
|
634 |
|
txt += 'fi\n' |
635 |
|
|
636 |
|
# Prepare JobType-specific part |
637 |
|
scram = self.scram.commandName() |
638 |
|
txt += '\n\n' |
639 |
< |
txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n' |
639 |
> |
txt += 'echo ">>> specific cmssw setup environment:"\n' |
640 |
> |
txt += 'echo "CMSSW_VERSION = '+self.version+'"\n' |
641 |
|
txt += scram+' project CMSSW '+self.version+'\n' |
642 |
|
txt += 'status=$?\n' |
643 |
|
txt += 'if [ $status != 0 ] ; then\n' |
644 |
< |
txt += ' echo "SET_EXE_ENV 1 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n' |
645 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10034"\n' |
646 |
< |
txt += ' echo "SanityCheckCode = 10034" | tee -a $RUNTIME_AREA/$repo\n' |
522 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
523 |
< |
## OLI_Daniele |
524 |
< |
txt += ' if [ $middleware == OSG ]; then \n' |
525 |
< |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
526 |
< |
txt += ' cd $RUNTIME_AREA\n' |
527 |
< |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
528 |
< |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
529 |
< |
txt += ' echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n' |
530 |
< |
txt += ' fi\n' |
531 |
< |
txt += ' fi \n' |
532 |
< |
txt += ' exit 1 \n' |
644 |
> |
txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n' |
645 |
> |
txt += ' job_exit_code=10034\n' |
646 |
> |
txt += ' func_exit\n' |
647 |
|
txt += 'fi \n' |
534 |
– |
txt += 'echo "CMSSW_VERSION = '+self.version+'"\n' |
648 |
|
txt += 'cd '+self.version+'\n' |
649 |
< |
### needed grep for bug in scramv1 ### |
649 |
> |
txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n' |
650 |
> |
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
651 |
|
txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n' |
652 |
< |
|
652 |
> |
txt += 'if [ $? != 0 ] ; then\n' |
653 |
> |
txt += ' echo "ERROR ==> Problem with the command: "\n' |
654 |
> |
txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n' |
655 |
> |
txt += ' job_exit_code=10034\n' |
656 |
> |
txt += ' func_exit\n' |
657 |
> |
txt += 'fi \n' |
658 |
|
# Handle the arguments: |
659 |
|
txt += "\n" |
660 |
< |
txt += "## ARGUMNETS: $1 Job Number\n" |
542 |
< |
# txt += "## ARGUMNETS: $2 First Event for this job\n" |
543 |
< |
# txt += "## ARGUMNETS: $3 Max Event for this job\n" |
660 |
> |
txt += "## number of arguments (first argument always jobnumber)\n" |
661 |
|
txt += "\n" |
662 |
< |
txt += "narg=$#\n" |
546 |
< |
txt += "if [ $narg -lt 2 ]\n" |
662 |
> |
txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n" |
663 |
|
txt += "then\n" |
664 |
< |
txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n" |
665 |
< |
txt += ' echo "JOB_EXIT_STATUS = 50113"\n' |
666 |
< |
txt += ' echo "SanityCheckCode = 50113" | tee -a $RUNTIME_AREA/$repo\n' |
551 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
552 |
< |
## OLI_Daniele |
553 |
< |
txt += ' if [ $middleware == OSG ]; then \n' |
554 |
< |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
555 |
< |
txt += ' cd $RUNTIME_AREA\n' |
556 |
< |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
557 |
< |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
558 |
< |
txt += ' echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n' |
559 |
< |
txt += ' fi\n' |
560 |
< |
txt += ' fi \n' |
561 |
< |
txt += " exit 1\n" |
664 |
> |
txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n" |
665 |
> |
txt += ' job_exit_code=50113\n' |
666 |
> |
txt += " func_exit\n" |
667 |
|
txt += "fi\n" |
668 |
|
txt += "\n" |
564 |
– |
txt += "NJob=$1\n" |
565 |
– |
txt += "InputFiles=$2\n" |
566 |
– |
txt += "echo \"<$InputFiles>\"\n" |
567 |
– |
# txt += "Args = ` cat $2 | sed -e \'s/\\\\//g\' -e \'s/\"/\\x27/g\' `" |
568 |
– |
|
569 |
– |
### OLI_DANIELE |
570 |
– |
txt += 'if [ $middleware == LCG ]; then \n' |
571 |
– |
txt += ' echo "MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n' |
572 |
– |
txt += ' echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n' |
573 |
– |
txt += ' echo "SyncCE=`edg-brokerinfo getCE`" | tee -a $RUNTIME_AREA/$repo\n' |
574 |
– |
txt += 'elif [ $middleware == OSG ]; then\n' |
575 |
– |
|
576 |
– |
# OLI: added monitoring for dashbord, use hash of crab.cfg |
577 |
– |
if common.scheduler.boss_scheduler_name == 'condor_g': |
578 |
– |
# create hash of cfg file |
579 |
– |
hash = makeCksum(common.work_space.cfgFileName()) |
580 |
– |
txt += ' echo "MonitorJobID=`echo ${NJob}_'+hash+'_$GLOBUS_GRAM_JOB_CONTACT`" | tee -a $RUNTIME_AREA/$repo\n' |
581 |
– |
txt += ' echo "SyncGridJobId=`echo $GLOBUS_GRAM_JOB_CONTACT`" | tee -a $RUNTIME_AREA/$repo\n' |
582 |
– |
txt += ' echo "SyncCE=`echo $hostname`" | tee -a $RUNTIME_AREA/$repo\n' |
583 |
– |
else : |
584 |
– |
txt += ' echo "MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n' |
585 |
– |
txt += ' echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n' |
586 |
– |
txt += ' echo "SyncCE=`$EDG_WL_LOG_DESTINATION`" | tee -a $RUNTIME_AREA/$repo\n' |
587 |
– |
|
588 |
– |
txt += 'fi\n' |
589 |
– |
txt += 'dumpStatus $RUNTIME_AREA/$repo\n' |
669 |
|
|
670 |
|
# Prepare job-specific part |
671 |
|
job = common.job_list[nj] |
672 |
< |
pset = os.path.basename(job.configFilename()) |
673 |
< |
txt += '\n' |
674 |
< |
#txt += 'echo sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' \n' |
675 |
< |
txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n' |
676 |
< |
#txt += 'sed "s#{\'INPUT\'}#${InputFiles}#" $RUNTIME_AREA/'+pset+' > pset1.cfg\n' |
672 |
> |
if (self.datasetPath): |
673 |
> |
self.primaryDataset = self.datasetPath.split("/")[1] |
674 |
> |
DataTier = self.datasetPath.split("/")[2] |
675 |
> |
txt += '\n' |
676 |
> |
txt += 'DatasetPath='+self.datasetPath+'\n' |
677 |
|
|
678 |
< |
if len(self.additional_inbox_files) > 0: |
679 |
< |
for file in self.additional_inbox_files: |
680 |
< |
txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n' |
602 |
< |
txt += ' cp $RUNTIME_AREA/'+file+' .\n' |
603 |
< |
txt += ' chmod +x '+file+'\n' |
604 |
< |
txt += 'fi\n' |
605 |
< |
pass |
678 |
> |
txt += 'PrimaryDataset='+self.primaryDataset +'\n' |
679 |
> |
txt += 'DataTier='+DataTier+'\n' |
680 |
> |
txt += 'ApplicationFamily=cmsRun\n' |
681 |
|
|
682 |
< |
txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n' |
682 |
> |
else: |
683 |
> |
self.primaryDataset = 'null' |
684 |
> |
txt += 'DatasetPath=MCDataTier\n' |
685 |
> |
txt += 'PrimaryDataset=null\n' |
686 |
> |
txt += 'DataTier=null\n' |
687 |
> |
txt += 'ApplicationFamily=MCDataTier\n' |
688 |
> |
if self.pset != None: |
689 |
> |
pset = os.path.basename(job.configFilename()) |
690 |
> |
txt += '\n' |
691 |
> |
txt += 'cp $RUNTIME_AREA/'+pset+' .\n' |
692 |
> |
|
693 |
> |
txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n' |
694 |
> |
txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n' |
695 |
> |
txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n' |
696 |
> |
txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n' |
697 |
> |
|
698 |
> |
txt += 'mv -f ' + pset + ' ' + psetName + '\n' |
699 |
> |
else: |
700 |
> |
txt += '\n' |
701 |
> |
txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs) |
702 |
|
|
609 |
– |
txt += '\n' |
610 |
– |
txt += 'echo "***** cat pset.cfg *********"\n' |
611 |
– |
txt += 'cat pset.cfg\n' |
612 |
– |
txt += 'echo "****** end pset.cfg ********"\n' |
613 |
– |
txt += '\n' |
614 |
– |
# txt += 'echo "***** cat pset1.cfg *********"\n' |
615 |
– |
# txt += 'cat pset1.cfg\n' |
616 |
– |
# txt += 'echo "****** end pset1.cfg ********"\n' |
703 |
|
return txt |
704 |
|
|
705 |
< |
def wsBuildExe(self, nj): |
705 |
> |
def wsUntarSoftware(self, nj=0): |
706 |
|
""" |
707 |
|
Put in the script the commands to build an executable |
708 |
|
or a library. |
709 |
|
""" |
710 |
|
|
711 |
< |
txt = "" |
711 |
> |
txt = '\n#Written by cms_cmssw::wsUntarSoftware\n' |
712 |
|
|
713 |
|
if os.path.isfile(self.tgzNameWithPath): |
714 |
< |
txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n' |
715 |
< |
txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n' |
714 |
> |
txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n' |
715 |
> |
txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n' |
716 |
> |
if self.debug_wrapper==1 : |
717 |
> |
txt += 'ls -Al \n' |
718 |
|
txt += 'untar_status=$? \n' |
719 |
|
txt += 'if [ $untar_status -ne 0 ]; then \n' |
720 |
< |
txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n' |
721 |
< |
txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n' |
722 |
< |
txt += ' echo "SanityCheckCode = $untar_status" | tee -a $repo\n' |
635 |
< |
txt += ' if [ $middleware == OSG ]; then \n' |
636 |
< |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
637 |
< |
txt += ' cd $RUNTIME_AREA\n' |
638 |
< |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
639 |
< |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
640 |
< |
txt += ' echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n' |
641 |
< |
txt += ' fi\n' |
642 |
< |
txt += ' fi \n' |
643 |
< |
txt += ' \n' |
644 |
< |
txt += ' exit $untar_status \n' |
720 |
> |
txt += ' echo "ERROR ==> Untarring .tgz file failed"\n' |
721 |
> |
txt += ' job_exit_code=$untar_status\n' |
722 |
> |
txt += ' func_exit\n' |
723 |
|
txt += 'else \n' |
724 |
|
txt += ' echo "Successful untar" \n' |
725 |
|
txt += 'fi \n' |
726 |
+ |
txt += '\n' |
727 |
+ |
txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n' |
728 |
+ |
txt += 'if [ -z "$PYTHONPATH" ]; then\n' |
729 |
+ |
txt += ' export PYTHONPATH=$RUNTIME_AREA/\n' |
730 |
+ |
txt += 'else\n' |
731 |
+ |
txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n' |
732 |
+ |
txt += 'echo "PYTHONPATH=$PYTHONPATH"\n' |
733 |
+ |
txt += 'fi\n' |
734 |
+ |
txt += '\n' |
735 |
+ |
|
736 |
|
pass |
737 |
< |
|
737 |
> |
|
738 |
|
return txt |
739 |
|
|
740 |
< |
def modifySteeringCards(self, nj): |
740 |
> |
def wsBuildExe(self, nj=0): |
741 |
|
""" |
742 |
< |
modify the card provided by the user, |
743 |
< |
writing a new card into share dir |
742 |
> |
Put in the script the commands to build an executable |
743 |
> |
or a library. |
744 |
|
""" |
745 |
< |
|
745 |
> |
|
746 |
> |
txt = '\n#Written by cms_cmssw::wsBuildExe\n' |
747 |
> |
txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n' |
748 |
> |
|
749 |
> |
txt += 'rm -r lib/ module/ \n' |
750 |
> |
txt += 'mv $RUNTIME_AREA/lib/ . \n' |
751 |
> |
txt += 'mv $RUNTIME_AREA/module/ . \n' |
752 |
> |
if self.dataExist == True: |
753 |
> |
txt += 'rm -r src/ \n' |
754 |
> |
txt += 'mv $RUNTIME_AREA/src/ . \n' |
755 |
> |
if len(self.additional_inbox_files)>0: |
756 |
> |
for file in self.additional_inbox_files: |
757 |
> |
txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n' |
758 |
> |
# txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n' |
759 |
> |
# txt += 'mv $RUNTIME_AREA/IMProv/ . \n' |
760 |
> |
|
761 |
> |
txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n' |
762 |
> |
txt += 'if [ -z "$PYTHONPATH" ]; then\n' |
763 |
> |
txt += ' export PYTHONPATH=$RUNTIME_AREA/\n' |
764 |
> |
txt += 'else\n' |
765 |
> |
txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n' |
766 |
> |
txt += 'echo "PYTHONPATH=$PYTHONPATH"\n' |
767 |
> |
txt += 'fi\n' |
768 |
> |
txt += '\n' |
769 |
> |
|
770 |
> |
if self.pset != None: |
771 |
> |
# FUTURE: Drop support for .cfg when possible |
772 |
> |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
773 |
> |
psetName = 'pset.py' |
774 |
> |
else: |
775 |
> |
psetName = 'pset.cfg' |
776 |
> |
# FUTURE: Can simply for 2_1_x and higher |
777 |
> |
txt += '\n' |
778 |
> |
if self.debug_wrapper == 1: |
779 |
> |
txt += 'echo "***** cat ' + psetName + ' *********"\n' |
780 |
> |
txt += 'cat ' + psetName + '\n' |
781 |
> |
txt += 'echo "****** end ' + psetName + ' ********"\n' |
782 |
> |
txt += '\n' |
783 |
> |
txt += 'echo "***********************" \n' |
784 |
> |
txt += 'which edmConfigHash \n' |
785 |
> |
txt += 'echo "***********************" \n' |
786 |
> |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
787 |
> |
txt += 'edmConfigHash ' + psetName + ' \n' |
788 |
> |
txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n' |
789 |
> |
else: |
790 |
> |
txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n' |
791 |
> |
txt += 'echo "PSETHASH = $PSETHASH" \n' |
792 |
> |
#### FEDE temporary fix for noEdm files ##### |
793 |
> |
txt += 'if [ -z "$PSETHASH" ]; then \n' |
794 |
> |
txt += ' export PSETHASH=null\n' |
795 |
> |
txt += 'fi \n' |
796 |
> |
############################################# |
797 |
> |
txt += '\n' |
798 |
> |
return txt |
799 |
> |
|
800 |
> |
|
801 |
|
def executableName(self): |
802 |
< |
return self.executable |
802 |
> |
if self.scriptExe: |
803 |
> |
return "sh " |
804 |
> |
else: |
805 |
> |
return self.executable |
806 |
|
|
807 |
|
def executableArgs(self): |
808 |
< |
return " -p pset.cfg" |
808 |
> |
# FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions |
809 |
> |
if self.scriptExe: |
810 |
> |
return self.scriptExe + " $NJob $AdditionalArgs" |
811 |
> |
else: |
812 |
> |
ex_args = "" |
813 |
> |
ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml" |
814 |
> |
# Type of config file depends on CMSSW version |
815 |
> |
if self.CMSSW_major >= 2 : |
816 |
> |
ex_args += " -p pset.py" |
817 |
> |
else: |
818 |
> |
ex_args += " -p pset.cfg" |
819 |
> |
return ex_args |
820 |
|
|
821 |
|
def inputSandbox(self, nj): |
822 |
|
""" |
823 |
|
Returns a list of filenames to be put in JDL input sandbox. |
824 |
|
""" |
825 |
|
inp_box = [] |
669 |
– |
# dict added to delete duplicate from input sandbox file list |
670 |
– |
seen = {} |
671 |
– |
## code |
826 |
|
if os.path.isfile(self.tgzNameWithPath): |
827 |
|
inp_box.append(self.tgzNameWithPath) |
828 |
< |
## config |
675 |
< |
inp_box.append(common.job_list[nj].configFilename()) |
676 |
< |
## additional input files |
677 |
< |
#for file in self.additional_inbox_files: |
678 |
< |
# inp_box.append(common.work_space.cwdDir()+file) |
828 |
> |
inp_box.append(common.work_space.jobDir() + self.scriptName) |
829 |
|
return inp_box |
830 |
|
|
831 |
|
def outputSandbox(self, nj): |
834 |
|
""" |
835 |
|
out_box = [] |
836 |
|
|
687 |
– |
stdout=common.job_list[nj].stdout() |
688 |
– |
stderr=common.job_list[nj].stderr() |
689 |
– |
|
837 |
|
## User Declared output files |
838 |
< |
for out in self.output_file: |
839 |
< |
n_out = nj + 1 |
840 |
< |
out_box.append(self.numberFile_(out,str(n_out))) |
838 |
> |
for out in (self.output_file+self.output_file_sandbox): |
839 |
> |
n_out = nj + 1 |
840 |
> |
out_box.append(numberFile(out,str(n_out))) |
841 |
|
return out_box |
695 |
– |
return [] |
842 |
|
|
697 |
– |
def prepareSteeringCards(self): |
698 |
– |
""" |
699 |
– |
Make initial modifications of the user's steering card file. |
700 |
– |
""" |
701 |
– |
return |
843 |
|
|
844 |
|
def wsRenameOutput(self, nj): |
845 |
|
""" |
846 |
|
Returns part of a job script which renames the produced files. |
847 |
|
""" |
848 |
|
|
849 |
< |
txt = '\n' |
850 |
< |
file_list = '' |
851 |
< |
check = len(self.output_file) |
852 |
< |
i = 0 |
853 |
< |
for fileWithSuffix in self.output_file: |
854 |
< |
i= i + 1 |
855 |
< |
output_file_num = self.numberFile_(fileWithSuffix, '$NJob') |
856 |
< |
file_list=file_list+output_file_num+'' |
857 |
< |
txt += '\n' |
717 |
< |
txt += 'ls \n' |
849 |
> |
txt = '\n#Written by cms_cmssw::wsRenameOutput\n' |
850 |
> |
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
851 |
> |
txt += 'echo ">>> current directory content:"\n' |
852 |
> |
if self.debug_wrapper==1: |
853 |
> |
txt += 'ls -Al\n' |
854 |
> |
txt += '\n' |
855 |
> |
|
856 |
> |
for fileWithSuffix in (self.output_file): |
857 |
> |
output_file_num = numberFile(fileWithSuffix, '$NJob') |
858 |
|
txt += '\n' |
859 |
< |
txt += 'ls '+fileWithSuffix+'\n' |
860 |
< |
txt += 'exe_result=$?\n' |
861 |
< |
txt += 'if [ $exe_result -ne 0 ] ; then\n' |
862 |
< |
txt += ' echo "ERROR: No output file to manage"\n' |
863 |
< |
### OLI_DANIELE |
864 |
< |
txt += ' if [ $middleware == OSG ]; then \n' |
865 |
< |
txt += ' echo "prepare dummy output file"\n' |
866 |
< |
txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
727 |
< |
txt += ' fi \n' |
859 |
> |
txt += '# check output file\n' |
860 |
> |
txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n' |
861 |
> |
if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA |
862 |
> |
txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n' |
863 |
> |
txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n' |
864 |
> |
else: |
865 |
> |
txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
866 |
> |
txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n' |
867 |
|
txt += 'else\n' |
868 |
< |
txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
868 |
> |
txt += ' job_exit_code=60302\n' |
869 |
> |
txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n' |
870 |
> |
if common.scheduler.name().upper() == 'CONDOR_G': |
871 |
> |
txt += ' if [ $middleware == OSG ]; then \n' |
872 |
> |
txt += ' echo "prepare dummy output file"\n' |
873 |
> |
txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n' |
874 |
> |
txt += ' fi \n' |
875 |
|
txt += 'fi\n' |
876 |
< |
if i == check: |
877 |
< |
txt += 'cd $RUNTIME_AREA\n' |
878 |
< |
pass |
879 |
< |
pass |
880 |
< |
|
881 |
< |
file_list=file_list[:-1] |
882 |
< |
txt += 'file_list="'+file_list+'"\n' |
883 |
< |
### OLI_DANIELE |
884 |
< |
txt += 'if [ $middleware == OSG ]; then\n' |
885 |
< |
txt += ' cd $RUNTIME_AREA\n' |
741 |
< |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
742 |
< |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
743 |
< |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
744 |
< |
txt += ' echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n' |
745 |
< |
txt += ' fi\n' |
746 |
< |
txt += 'fi\n' |
876 |
> |
file_list = [] |
877 |
> |
for fileWithSuffix in (self.output_file): |
878 |
> |
file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob')) |
879 |
> |
|
880 |
> |
txt += 'file_list="'+string.join(file_list,',')+'"\n' |
881 |
> |
txt += '\n' |
882 |
> |
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
883 |
> |
txt += 'echo ">>> current directory content:"\n' |
884 |
> |
if self.debug_wrapper==1: |
885 |
> |
txt += 'ls -Al\n' |
886 |
|
txt += '\n' |
887 |
+ |
txt += 'cd $RUNTIME_AREA\n' |
888 |
+ |
txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n' |
889 |
|
return txt |
890 |
|
|
891 |
< |
def numberFile_(self, file, txt): |
751 |
< |
""" |
752 |
< |
append _'txt' before last extension of a file |
753 |
< |
""" |
754 |
< |
p = string.split(file,".") |
755 |
< |
# take away last extension |
756 |
< |
name = p[0] |
757 |
< |
for x in p[1:-1]: |
758 |
< |
name=name+"."+x |
759 |
< |
# add "_txt" |
760 |
< |
if len(p)>1: |
761 |
< |
ext = p[len(p)-1] |
762 |
< |
#result = name + '_' + str(txt) + "." + ext |
763 |
< |
result = name + '_' + txt + "." + ext |
764 |
< |
else: |
765 |
< |
#result = name + '_' + str(txt) |
766 |
< |
result = name + '_' + txt |
767 |
< |
|
768 |
< |
return result |
769 |
< |
|
770 |
< |
def getRequirements(self): |
891 |
> |
def getRequirements(self, nj=[]): |
892 |
|
""" |
893 |
< |
return job requirements to add to jdl files |
893 |
> |
return job requirements to add to jdl files |
894 |
|
""" |
895 |
|
req = '' |
896 |
< |
if common.analisys_common_info['sites']: |
897 |
< |
if common.analisys_common_info['sw_version']: |
898 |
< |
req='Member("VO-cms-' + \ |
899 |
< |
common.analisys_common_info['sw_version'] + \ |
900 |
< |
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
901 |
< |
if len(common.analisys_common_info['sites'])>0: |
902 |
< |
req = req + ' && (' |
903 |
< |
for i in range(len(common.analisys_common_info['sites'])): |
904 |
< |
req = req + 'other.GlueCEInfoHostName == "' \ |
905 |
< |
+ common.analisys_common_info['sites'][i] + '"' |
906 |
< |
if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ): |
907 |
< |
req = req + ' || ' |
908 |
< |
req = req + ')' |
788 |
< |
#print "req = ", req |
896 |
> |
if self.version: |
897 |
> |
req='Member("VO-cms-' + \ |
898 |
> |
self.version + \ |
899 |
> |
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
900 |
> |
if self.executable_arch: |
901 |
> |
req+=' && Member("VO-cms-' + \ |
902 |
> |
self.executable_arch + \ |
903 |
> |
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
904 |
> |
|
905 |
> |
req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)' |
906 |
> |
if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"): |
907 |
> |
req += ' && other.GlueCEStateStatus == "Production" ' |
908 |
> |
|
909 |
|
return req |
910 |
|
|
911 |
|
def configFilename(self): |
912 |
|
""" return the config filename """ |
913 |
< |
return self.name()+'.cfg' |
913 |
> |
# FUTURE: Can remove cfg mode for CMSSW >= 2_1_x |
914 |
> |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
915 |
> |
return self.name()+'.py' |
916 |
> |
else: |
917 |
> |
return self.name()+'.cfg' |
918 |
|
|
795 |
– |
### OLI_DANIELE |
919 |
|
def wsSetupCMSOSGEnvironment_(self): |
920 |
|
""" |
921 |
|
Returns part of a job script which is prepares |
922 |
|
the execution environment and which is common for all CMS jobs. |
923 |
|
""" |
924 |
< |
txt = '\n' |
925 |
< |
txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n' |
926 |
< |
txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n' |
927 |
< |
txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n' |
928 |
< |
txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n' |
929 |
< |
txt += ' elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n' |
930 |
< |
txt += ' # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n' |
931 |
< |
txt += ' source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n' |
932 |
< |
txt += ' else\n' |
933 |
< |
txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n' |
934 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10020"\n' |
935 |
< |
txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n' |
936 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
814 |
< |
txt += ' exit\n' |
815 |
< |
txt += '\n' |
816 |
< |
txt += ' echo "Remove working directory: $WORKING_DIR"\n' |
817 |
< |
txt += ' cd $RUNTIME_AREA\n' |
818 |
< |
txt += ' /bin/rm -rf $WORKING_DIR\n' |
819 |
< |
txt += ' if [ -d $WORKING_DIR ] ;then\n' |
820 |
< |
txt += ' echo "OSG WORKING DIR ==> $WORKING_DIR could not be deleted on on WN `hostname`"\n' |
821 |
< |
txt += ' fi\n' |
822 |
< |
txt += '\n' |
823 |
< |
txt += ' exit\n' |
824 |
< |
txt += ' fi\n' |
924 |
> |
txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n' |
925 |
> |
txt += ' echo ">>> setup CMS OSG environment:"\n' |
926 |
> |
txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n' |
927 |
> |
txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
928 |
> |
txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n' |
929 |
> |
txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n' |
930 |
> |
txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n' |
931 |
> |
txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n' |
932 |
> |
txt += ' else\n' |
933 |
> |
txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n' |
934 |
> |
txt += ' job_exit_code=10020\n' |
935 |
> |
txt += ' func_exit\n' |
936 |
> |
txt += ' fi\n' |
937 |
|
txt += '\n' |
938 |
< |
txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n' |
939 |
< |
txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n' |
938 |
> |
txt += ' echo "==> setup cms environment ok"\n' |
939 |
> |
txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n' |
940 |
|
|
941 |
|
return txt |
942 |
< |
|
831 |
< |
### OLI_DANIELE |
942 |
> |
|
943 |
|
def wsSetupCMSLCGEnvironment_(self): |
944 |
|
""" |
945 |
|
Returns part of a job script which is prepares |
946 |
|
the execution environment and which is common for all CMS jobs. |
947 |
|
""" |
948 |
< |
txt = ' \n' |
949 |
< |
txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n' |
950 |
< |
txt += ' echo "JOB_EXIT_STATUS = 0"\n' |
951 |
< |
txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n' |
952 |
< |
txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n' |
953 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10031" \n' |
954 |
< |
txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n' |
955 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
956 |
< |
txt += ' exit\n' |
957 |
< |
txt += ' else\n' |
958 |
< |
txt += ' echo "Sourcing environment... "\n' |
959 |
< |
txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n' |
960 |
< |
txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n' |
961 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10020"\n' |
962 |
< |
txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n' |
963 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
964 |
< |
txt += ' exit\n' |
965 |
< |
txt += ' fi\n' |
966 |
< |
txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n' |
967 |
< |
txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n' |
968 |
< |
txt += ' result=$?\n' |
969 |
< |
txt += ' if [ $result -ne 0 ]; then\n' |
970 |
< |
txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n' |
971 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10032"\n' |
972 |
< |
txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n' |
973 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
974 |
< |
txt += ' exit\n' |
975 |
< |
txt += ' fi\n' |
976 |
< |
txt += ' fi\n' |
977 |
< |
txt += ' \n' |
978 |
< |
txt += ' string=`cat /etc/redhat-release`\n' |
979 |
< |
txt += ' echo $string\n' |
980 |
< |
txt += ' if [[ $string = *alhalla* ]]; then\n' |
981 |
< |
txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n' |
982 |
< |
txt += ' elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n' |
983 |
< |
txt += ' export SCRAM_ARCH=slc3_ia32_gcc323\n' |
984 |
< |
txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n' |
985 |
< |
txt += ' else\n' |
986 |
< |
txt += ' echo "SET_CMS_ENV 1 ==> ERROR OS unknown, LCG environment not initialized"\n' |
987 |
< |
txt += ' echo "JOB_EXIT_STATUS = 10033"\n' |
988 |
< |
txt += ' echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n' |
989 |
< |
txt += ' dumpStatus $RUNTIME_AREA/$repo\n' |
990 |
< |
txt += ' exit\n' |
991 |
< |
txt += ' fi\n' |
992 |
< |
txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n' |
993 |
< |
txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n' |
948 |
> |
txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n' |
949 |
> |
txt += ' echo ">>> setup CMS LCG environment:"\n' |
950 |
> |
txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n' |
951 |
> |
txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
952 |
> |
txt += ' export BUILD_ARCH='+self.executable_arch+'\n' |
953 |
> |
txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n' |
954 |
> |
txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n' |
955 |
> |
txt += ' job_exit_code=10031\n' |
956 |
> |
txt += ' func_exit\n' |
957 |
> |
txt += ' else\n' |
958 |
> |
txt += ' echo "Sourcing environment... "\n' |
959 |
> |
txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n' |
960 |
> |
txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n' |
961 |
> |
txt += ' job_exit_code=10020\n' |
962 |
> |
txt += ' func_exit\n' |
963 |
> |
txt += ' fi\n' |
964 |
> |
txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n' |
965 |
> |
txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n' |
966 |
> |
txt += ' result=$?\n' |
967 |
> |
txt += ' if [ $result -ne 0 ]; then\n' |
968 |
> |
txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n' |
969 |
> |
txt += ' job_exit_code=10032\n' |
970 |
> |
txt += ' func_exit\n' |
971 |
> |
txt += ' fi\n' |
972 |
> |
txt += ' fi\n' |
973 |
> |
txt += ' \n' |
974 |
> |
txt += ' echo "==> setup cms environment ok"\n' |
975 |
> |
return txt |
976 |
> |
|
977 |
> |
def wsModifyReport(self, nj): |
978 |
> |
""" |
979 |
> |
insert the part of the script that modifies the FrameworkJob Report |
980 |
> |
""" |
981 |
> |
|
982 |
> |
txt = '' |
983 |
> |
publish_data = int(self.cfg_params.get('USER.publish_data',0)) |
984 |
> |
#if (publish_data == 1): |
985 |
> |
if (self.copy_data == 1): |
986 |
> |
txt = '\n#Written by cms_cmssw::wsModifyReport\n' |
987 |
> |
publish_data = int(self.cfg_params.get('USER.publish_data',0)) |
988 |
> |
|
989 |
> |
|
990 |
> |
txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n' |
991 |
> |
txt += ' FOR_LFN=$LFNBaseName\n' |
992 |
> |
txt += 'else\n' |
993 |
> |
txt += ' FOR_LFN=/copy_problems/ \n' |
994 |
> |
txt += 'fi\n' |
995 |
> |
|
996 |
> |
txt += 'echo ">>> Modify Job Report:" \n' |
997 |
> |
txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n' |
998 |
> |
txt += 'echo "SE = $SE"\n' |
999 |
> |
txt += 'echo "SE_PATH = $SE_PATH"\n' |
1000 |
> |
txt += 'echo "FOR_LFN = $FOR_LFN" \n' |
1001 |
> |
txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n' |
1002 |
> |
|
1003 |
> |
|
1004 |
> |
args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH' |
1005 |
> |
if (publish_data == 1): |
1006 |
> |
processedDataset = self.cfg_params['USER.publish_data_name'] |
1007 |
> |
txt += 'ProcessedDataset='+processedDataset+'\n' |
1008 |
> |
txt += 'echo "ProcessedDataset = $ProcessedDataset"\n' |
1009 |
> |
args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH' |
1010 |
> |
|
1011 |
> |
txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n' |
1012 |
> |
txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n' |
1013 |
> |
txt += 'modifyReport_result=$?\n' |
1014 |
> |
txt += 'if [ $modifyReport_result -ne 0 ]; then\n' |
1015 |
> |
txt += ' modifyReport_result=70500\n' |
1016 |
> |
txt += ' job_exit_code=$modifyReport_result\n' |
1017 |
> |
txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n' |
1018 |
> |
txt += ' echo "WARNING: Problem with ModifyJobReport"\n' |
1019 |
> |
txt += 'else\n' |
1020 |
> |
txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n' |
1021 |
> |
txt += 'fi\n' |
1022 |
> |
return txt |
1023 |
> |
|
1024 |
> |
def wsParseFJR(self): |
1025 |
> |
""" |
1026 |
> |
Parse the FrameworkJobReport to obtain useful infos |
1027 |
> |
""" |
1028 |
> |
txt = '\n#Written by cms_cmssw::wsParseFJR\n' |
1029 |
> |
txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n' |
1030 |
> |
txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n' |
1031 |
> |
txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n' |
1032 |
> |
txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n' |
1033 |
> |
if self.debug_wrapper==1 : |
1034 |
> |
txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n' |
1035 |
> |
txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n' |
1036 |
> |
txt += ' if [ $executable_exit_status -eq 50115 ];then\n' |
1037 |
> |
txt += ' echo ">>> crab_fjr.xml contents: "\n' |
1038 |
> |
txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n' |
1039 |
> |
txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n' |
1040 |
> |
txt += ' elif [ $executable_exit_status -eq -999 ];then\n' |
1041 |
> |
txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n' |
1042 |
> |
txt += ' else\n' |
1043 |
> |
txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n' |
1044 |
> |
txt += ' fi\n' |
1045 |
> |
txt += ' else\n' |
1046 |
> |
txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n' |
1047 |
> |
txt += ' fi\n' |
1048 |
> |
#### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap |
1049 |
> |
txt += ' if [ $executable_exit_status -eq 0 ];then\n' |
1050 |
> |
txt += ' echo ">>> Executable succeded $executable_exit_status"\n' |
1051 |
> |
## This cannot more work given the changes on the Job argumentsJob |
1052 |
> |
""" |
1053 |
> |
if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) : |
1054 |
> |
# VERIFY PROCESSED DATA |
1055 |
> |
txt += ' echo ">>> Verify list of processed files:"\n' |
1056 |
> |
txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n' |
1057 |
> |
txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n' |
1058 |
> |
txt += ' cat input-files.txt | sort | uniq > tmp.txt\n' |
1059 |
> |
txt += ' mv tmp.txt input-files.txt\n' |
1060 |
> |
txt += ' echo "cat input-files.txt"\n' |
1061 |
> |
txt += ' echo "----------------------"\n' |
1062 |
> |
txt += ' cat input-files.txt\n' |
1063 |
> |
txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n' |
1064 |
> |
txt += ' mv tmp.txt processed-files.txt\n' |
1065 |
> |
txt += ' echo "----------------------"\n' |
1066 |
> |
txt += ' echo "cat processed-files.txt"\n' |
1067 |
> |
txt += ' echo "----------------------"\n' |
1068 |
> |
txt += ' cat processed-files.txt\n' |
1069 |
> |
txt += ' echo "----------------------"\n' |
1070 |
> |
txt += ' diff -qbB input-files.txt processed-files.txt\n' |
1071 |
> |
txt += ' fileverify_status=$?\n' |
1072 |
> |
txt += ' if [ $fileverify_status -ne 0 ]; then\n' |
1073 |
> |
txt += ' executable_exit_status=30001\n' |
1074 |
> |
txt += ' echo "ERROR ==> not all input files processed"\n' |
1075 |
> |
txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n' |
1076 |
> |
txt += ' echo " ==> diff input-files.txt processed-files.txt"\n' |
1077 |
> |
txt += ' fi\n' |
1078 |
> |
""" |
1079 |
> |
txt += ' fi\n' |
1080 |
> |
txt += 'else\n' |
1081 |
> |
txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n' |
1082 |
> |
txt += 'fi\n' |
1083 |
> |
txt += '\n' |
1084 |
> |
txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n' |
1085 |
> |
txt += ' echo ">>> Executable failed $executable_exit_status"\n' |
1086 |
> |
txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
1087 |
> |
txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n' |
1088 |
> |
txt += ' job_exit_code=$executable_exit_status\n' |
1089 |
> |
txt += ' func_exit\n' |
1090 |
> |
txt += 'fi\n\n' |
1091 |
> |
txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
1092 |
> |
txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n' |
1093 |
> |
txt += 'job_exit_code=$executable_exit_status\n' |
1094 |
> |
|
1095 |
> |
return txt |
1096 |
> |
|
1097 |
> |
def setParam_(self, param, value): |
1098 |
> |
self._params[param] = value |
1099 |
> |
|
1100 |
> |
def getParams(self): |
1101 |
> |
return self._params |
1102 |
> |
|
1103 |
> |
def outList(self,list=False): |
1104 |
> |
""" |
1105 |
> |
check the dimension of the output files |
1106 |
> |
""" |
1107 |
> |
txt = '' |
1108 |
> |
txt += 'echo ">>> list of expected files on output sandbox"\n' |
1109 |
> |
listOutFiles = [] |
1110 |
> |
stdout = 'CMSSW_$NJob.stdout' |
1111 |
> |
stderr = 'CMSSW_$NJob.stderr' |
1112 |
> |
if len(self.output_file) <= 0: |
1113 |
> |
msg ="WARNING: no output files name have been defined!!\n" |
1114 |
> |
msg+="\tno output files will be reported back/staged\n" |
1115 |
> |
common.logger.info(msg) |
1116 |
> |
if (self.return_data == 1): |
1117 |
> |
for file in (self.output_file+self.output_file_sandbox): |
1118 |
> |
listOutFiles.append(numberFile(file, '$NJob')) |
1119 |
> |
listOutFiles.append(stdout) |
1120 |
> |
listOutFiles.append(stderr) |
1121 |
> |
else: |
1122 |
> |
for file in (self.output_file_sandbox): |
1123 |
> |
listOutFiles.append(numberFile(file, '$NJob')) |
1124 |
> |
listOutFiles.append(stdout) |
1125 |
> |
listOutFiles.append(stderr) |
1126 |
> |
txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n' |
1127 |
> |
txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n' |
1128 |
> |
txt += 'export filesToCheck\n' |
1129 |
> |
|
1130 |
> |
if list : return self.output_file |
1131 |
|
return txt |