4 |
|
from crab_util import * |
5 |
|
import common |
6 |
|
|
7 |
< |
import os, sys, tempfile |
7 |
> |
import os, sys, time |
8 |
|
|
9 |
|
class SchedulerEdg(Scheduler): |
10 |
|
def __init__(self): |
36 |
|
try: self.EDG_retry_count = cfg_params['EDG.retry_count'] |
37 |
|
except KeyError: self.EDG_retry_count = '' |
38 |
|
|
39 |
+ |
try: self.VO = cfg_params['EDG.virtual_organization'] |
40 |
+ |
except KeyError: self.VO = 'cms' |
41 |
+ |
|
42 |
+ |
try: |
43 |
+ |
self.copy_input_data = common.analisys_common_info['copy_input_data'] |
44 |
+ |
#print "self.copy_input_data = ", self.copy_input_data |
45 |
+ |
except KeyError: self.copy_input_data = 0 |
46 |
+ |
|
47 |
+ |
try: self.return_data = cfg_params['USER.return_data'] |
48 |
+ |
except KeyError: self.return_data = 1 |
49 |
+ |
|
50 |
+ |
try: |
51 |
+ |
self.copy_data = cfg_params["USER.copy_data"] |
52 |
+ |
if int(self.copy_data) == 1: |
53 |
+ |
try: |
54 |
+ |
self.SE = cfg_params['USER.storage_element'] |
55 |
+ |
self.SE_PATH = cfg_params['USER.storage_path'] |
56 |
+ |
except KeyError: |
57 |
+ |
msg = "Error. The [USER] section does not have 'storage_element'" |
58 |
+ |
msg = msg + " and/or 'storage_path' entries, necessary to copy the output" |
59 |
+ |
common.logger.message(msg) |
60 |
+ |
raise CrabException(msg) |
61 |
+ |
except KeyError: self.copy_data = 0 |
62 |
+ |
|
63 |
+ |
if ( int(self.return_data) == 0 and int(self.copy_data) == 0 ): |
64 |
+ |
msg = 'Warning: return_data = 0 and copy_data = 0 ==> your exe output will be lost\n' |
65 |
+ |
msg = msg + 'Please modify return_data and copy_data value in your crab.cfg file\n' |
66 |
+ |
raise CrabException(msg) |
67 |
+ |
|
68 |
|
try: |
69 |
< |
self.VO = cfg_params['EDG.virtual_organization'] |
69 |
> |
self.lfc_host = cfg_params['EDG.lfc_host'] |
70 |
|
except KeyError: |
71 |
< |
self.VO = 'cms' |
71 |
> |
msg = "Error. The [EDG] section does not have 'lfc_host' value" |
72 |
> |
msg = msg + " it's necessary to know the LFC host name" |
73 |
> |
common.logger.message(msg) |
74 |
> |
raise CrabException(msg) |
75 |
> |
try: |
76 |
> |
self.lcg_catalog_type = cfg_params['EDG.lcg_catalog_type'] |
77 |
> |
except KeyError: |
78 |
> |
msg = "Error. The [EDG] section does not have 'lcg_catalog_type' value" |
79 |
> |
msg = msg + " it's necessary to know the catalog type" |
80 |
> |
common.logger.message(msg) |
81 |
> |
raise CrabException(msg) |
82 |
> |
try: |
83 |
> |
self.lfc_home = cfg_params['EDG.lfc_home'] |
84 |
> |
except KeyError: |
85 |
> |
msg = "Error. The [EDG] section does not have 'lfc_home' value" |
86 |
> |
msg = msg + " it's necessary to know the home catalog dir" |
87 |
> |
common.logger.message(msg) |
88 |
> |
raise CrabException(msg) |
89 |
> |
|
90 |
> |
try: |
91 |
> |
self.register_data = cfg_params["USER.register_data"] |
92 |
> |
if int(self.register_data) == 1: |
93 |
> |
try: |
94 |
> |
self.LFN = cfg_params['USER.lfn_dir'] |
95 |
> |
except KeyError: |
96 |
> |
msg = "Error. The [USER] section does not have 'lfn_dir' value" |
97 |
> |
msg = msg + " it's necessary for LCF registration" |
98 |
> |
common.logger.message(msg) |
99 |
> |
raise CrabException(msg) |
100 |
> |
except KeyError: self.register_data = 0 |
101 |
> |
|
102 |
> |
if ( int(self.copy_data) == 0 and int(self.register_data) == 1 ): |
103 |
> |
msg = 'Warning: register_data = 1 must be used with copy_data = 1\n' |
104 |
> |
msg = msg + 'Please modify copy_data value in your crab.cfg file\n' |
105 |
> |
common.logger.message(msg) |
106 |
> |
raise CrabException(msg) |
107 |
> |
|
108 |
> |
try: self.EDG_requirements = cfg_params['EDG.requirements'] |
109 |
> |
except KeyError: self.EDG_requirements = '' |
110 |
> |
|
111 |
> |
try: self.EDG_retry_count = cfg_params['EDG.retry_count'] |
112 |
> |
except KeyError: self.EDG_retry_count = '' |
113 |
> |
|
114 |
> |
try: self.EDG_clock_time = cfg_params['EDG.max_wall_clock_time'] |
115 |
> |
except KeyError: self.EDG_clock_time= '' |
116 |
> |
|
117 |
> |
try: self.EDG_cpu_time = cfg_params['EDG.max_cpu_time'] |
118 |
> |
except KeyError: self.EDG_cpu_time = '' |
119 |
|
|
120 |
|
# Add EDG_WL_LOCATION to the python path |
121 |
|
|
130 |
|
libPath=os.path.join(path, "lib", "python") |
131 |
|
sys.path.append(libPath) |
132 |
|
|
133 |
< |
self.checkProxy_() |
133 |
> |
self.proxyValid=0 |
134 |
|
return |
135 |
|
|
136 |
|
|
153 |
|
""" |
154 |
|
Returns part of a job script which does scheduler-specific work. |
155 |
|
""" |
156 |
< |
txt = '\n' |
156 |
> |
|
157 |
> |
txt = '' |
158 |
> |
if int(self.copy_data) == 1: |
159 |
> |
if self.SE: |
160 |
> |
txt += 'export SE='+self.SE+'\n' |
161 |
> |
txt += 'echo "SE = $SE"\n' |
162 |
> |
if self.SE_PATH: |
163 |
> |
if ( self.SE_PATH[-1] != '/' ) : self.SE_PATH = self.SE_PATH + '/' |
164 |
> |
txt += 'export SE_PATH='+self.SE_PATH+'\n' |
165 |
> |
txt += 'echo "SE_PATH = $SE_PATH"\n' |
166 |
> |
|
167 |
> |
txt += 'export VO='+self.VO+'\n' |
168 |
> |
### FEDE: add some line for LFC catalog setting |
169 |
> |
txt += 'if [[ $LCG_CATALOG_TYPE != \''+self.lcg_catalog_type+'\' ]]; then\n' |
170 |
> |
txt += ' export LCG_CATALOG_TYPE='+self.lcg_catalog_type+'\n' |
171 |
> |
txt += 'fi\n' |
172 |
> |
txt += 'if [[ $LFC_HOST != \''+self.lfc_host+'\' ]]; then\n' |
173 |
> |
txt += 'export LFC_HOST='+self.lfc_host+'\n' |
174 |
> |
txt += 'fi\n' |
175 |
> |
txt += 'if [[ $LFC_HOME != \''+self.lfc_home+'\' ]]; then\n' |
176 |
> |
txt += 'export LFC_HOME='+self.lfc_home+'\n' |
177 |
> |
txt += 'fi\n' |
178 |
> |
##### |
179 |
> |
if int(self.register_data) == 1: |
180 |
> |
txt += 'export LFN='+self.LFN+'\n' |
181 |
> |
txt += 'lfc-ls $LFN\n' |
182 |
> |
txt += 'result=$?\n' |
183 |
> |
txt += 'echo $result\n' |
184 |
> |
### creation of LFN dir in LFC catalog, under /grid/cms dir |
185 |
> |
txt += 'if [ $result != 0 ]; then\n' |
186 |
> |
txt += ' lfc-mkdir $LFN\n' |
187 |
> |
txt += ' result=$?\n' |
188 |
> |
txt += ' echo $result\n' |
189 |
> |
txt += 'fi\n' |
190 |
> |
txt += '\n' |
191 |
|
txt += 'CloseCEs=`edg-brokerinfo getCE`\n' |
192 |
|
txt += 'echo "CloseCEs = $CloseCEs"\n' |
193 |
|
txt += 'CE=`echo $CloseCEs | sed -e "s/:.*//"`\n' |
194 |
|
txt += 'echo "CE = $CE"\n' |
195 |
|
return txt |
196 |
|
|
197 |
< |
def loggingInfo(self, nj): |
197 |
> |
def wsCopyInput(self): |
198 |
> |
""" |
199 |
> |
Copy input data from SE to WN |
200 |
> |
""" |
201 |
> |
txt = '' |
202 |
> |
try: |
203 |
> |
self.copy_input_data = common.analisys_common_info['copy_input_data'] |
204 |
> |
#print "self.copy_input_data = ", self.copy_input_data |
205 |
> |
except KeyError: self.copy_input_data = 0 |
206 |
> |
if int(self.copy_input_data) == 1: |
207 |
> |
txt += '#\n' |
208 |
> |
txt += '# Copy Input Data from SE to this WN\n' |
209 |
> |
txt += '#\n' |
210 |
> |
### changed by georgia (put a loop copying more than one input files per jobs) |
211 |
> |
txt +='for input_file in $cur_file_list \n' |
212 |
> |
txt +='do \n' |
213 |
> |
txt +=' lcg-cp --vo $VO lfn:$input_lfn/$input_file file:`pwd`/$input_file 2>&1\n' |
214 |
> |
txt +=' copy_input_exit_status=$?\n' |
215 |
> |
txt +=' echo "COPY_INPUT_EXIT_STATUS = $copy_input_exit_status"\n' |
216 |
> |
txt +=' if [ $copy_input_exit_status -ne 0 ]; then \n' |
217 |
> |
txt +=' echo "Problems with copying to WN" \n' |
218 |
> |
txt +=' else \n' |
219 |
> |
txt +=' echo "input copied into WN" \n' |
220 |
> |
txt +=' fi \n' |
221 |
> |
txt +='done \n' |
222 |
> |
### copy a set of PU ntuples (same for each jobs -- but accessed randomly) |
223 |
> |
txt +='for file in $cur_pu_list \n' |
224 |
> |
txt +='do \n' |
225 |
> |
txt +=' lcg-cp --vo $VO lfn:$pu_lfn/$file file:`pwd`/$file 2>&1\n' |
226 |
> |
txt +=' copy_input_exit_status=$?\n' |
227 |
> |
txt +=' echo "COPY_INPUT_PU_EXIT_STATUS = $copy_input_pu_exit_status"\n' |
228 |
> |
txt +=' if [ $copy_input_pu_exit_status -ne 0 ]; then \n' |
229 |
> |
txt +=' echo "Problems with copying pu to WN" \n' |
230 |
> |
txt +=' else \n' |
231 |
> |
txt +=' echo "input pu files copied into WN" \n' |
232 |
> |
txt +=' fi \n' |
233 |
> |
txt +='done \n' |
234 |
> |
txt +='\n' |
235 |
> |
txt +='### Check SCRATCH space available on WN : \n' |
236 |
> |
txt +='df -h \n' |
237 |
> |
return txt |
238 |
> |
|
239 |
> |
def wsCopyOutput(self): |
240 |
> |
""" |
241 |
> |
Write a CopyResults part of a job script, e.g. |
242 |
> |
to copy produced output into a storage element. |
243 |
> |
""" |
244 |
> |
txt = '' |
245 |
> |
if int(self.copy_data) == 1: |
246 |
> |
txt += '#\n' |
247 |
> |
txt += '# Copy output to SE = $SE\n' |
248 |
> |
txt += '#\n' |
249 |
> |
txt += 'if [ $exe_result -eq 0 ]; then\n' |
250 |
> |
txt += ' for out_file in $file_list ; do\n' |
251 |
> |
txt += ' echo "Trying to copy output file to $SE "\n' |
252 |
> |
txt += ' echo "lcg-cp --vo cms -t 1200 file://`pwd`/$out_file gsiftp://${SE}${SE_PATH}$out_file"\n' |
253 |
> |
# txt += ' echo "globus-url-copy file://`pwd`/$out_file gsiftp://${SE}${SE_PATH}$out_file"\n' |
254 |
> |
txt += ' exitstring=`lcg-cp --vo cms -t 1200 file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n' |
255 |
> |
# txt += ' exitstring=`globus-url-copy file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n' |
256 |
> |
txt += ' copy_exit_status=$?\n' |
257 |
> |
txt += ' echo "COPY_EXIT_STATUS = $copy_exit_status"\n' |
258 |
> |
txt += ' echo "STAGE_OUT = $copy_exit_status"\n' |
259 |
> |
txt += ' if [ $copy_exit_status -ne 0 ]; then\n' |
260 |
> |
txt += ' echo "Problems with SE = $SE"\n' |
261 |
> |
txt += ' echo "StageOutExitStatus = 198" | tee -a $RUNTIME_AREA/$repo\n' |
262 |
> |
txt += ' echo "StageOutExitStatusReason = $exitstring" | tee -a $RUNTIME_AREA/$repo\n' |
263 |
> |
txt += ' else\n' |
264 |
> |
txt += ' echo "StageOutSE = $SE" | tee -a $RUNTIME_AREA/$repo\n' |
265 |
> |
txt += ' echo "StageOutCatalog = " | tee -a $RUNTIME_AREA/$repo\n' |
266 |
> |
txt += ' echo "output copied into $SE/$SE_PATH directory"\n' |
267 |
> |
txt += ' echo "StageOutExitStatus = 0" | tee -a $RUNTIME_AREA/$repo\n' |
268 |
> |
txt += ' fi\n' |
269 |
> |
txt += ' done\n' |
270 |
> |
txt += 'fi\n' |
271 |
> |
return txt |
272 |
> |
|
273 |
> |
def wsRegisterOutput(self): |
274 |
> |
""" |
275 |
> |
Returns part of a job script which does scheduler-specific work. |
276 |
> |
""" |
277 |
> |
|
278 |
> |
txt = '' |
279 |
> |
if int(self.register_data) == 1: |
280 |
> |
txt += '#\n' |
281 |
> |
txt += '# Register output to LFC\n' |
282 |
> |
txt += '#\n' |
283 |
> |
txt += 'if [[ $exe_result -eq 0 && $copy_exit_status -eq 0 ]]; then\n' |
284 |
> |
txt += ' for out_file in $file_list ; do\n' |
285 |
> |
txt += ' echo "Trying to register the output file into LFC"\n' |
286 |
> |
txt += ' echo "lcg-rf -l $LFN/$out_file --vo $VO sfn://$SE$SE_PATH/$out_file"\n' |
287 |
> |
txt += ' lcg-rf -l $LFN/$out_file --vo $VO sfn://$SE$SE_PATH/$out_file 2>&1 \n' |
288 |
> |
txt += ' register_exit_status=$?\n' |
289 |
> |
txt += ' echo "REGISTER_EXIT_STATUS = $register_exit_status"\n' |
290 |
> |
txt += ' echo "STAGE_OUT = $register_exit_status"\n' |
291 |
> |
txt += ' if [ $register_exit_status -ne 0 ]; then \n' |
292 |
> |
txt += ' echo "Problems with the registration to LFC" \n' |
293 |
> |
txt += ' echo "Try with srm protocol" \n' |
294 |
> |
txt += ' echo "lcg-rf -l $LFN/$out_file --vo $VO srm://$SE$SE_PATH/$out_file"\n' |
295 |
> |
txt += ' lcg-rf -l $LFN/$out_file --vo $VO srm://$SE$SE_PATH/$out_file 2>&1 \n' |
296 |
> |
txt += ' register_exit_status=$?\n' |
297 |
> |
txt += ' echo "REGISTER_EXIT_STATUS = $register_exit_status"\n' |
298 |
> |
txt += ' echo "STAGE_OUT = $register_exit_status"\n' |
299 |
> |
txt += ' if [ $register_exit_status -ne 0 ]; then \n' |
300 |
> |
txt += ' echo "Problems with the registration into LFC" \n' |
301 |
> |
txt += ' fi \n' |
302 |
> |
txt += ' else \n' |
303 |
> |
txt += ' echo "output registered to LFC"\n' |
304 |
> |
txt += ' fi \n' |
305 |
> |
txt += ' echo "StageOutExitStatus = $register_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
306 |
> |
txt += ' done\n' |
307 |
> |
txt += 'elif [[ $exe_result -eq 0 && $copy_exit_status -ne 0 ]]; then \n' |
308 |
> |
txt += ' echo "Trying to copy output file to CloseSE"\n' |
309 |
> |
txt += ' CLOSE_SE=`edg-brokerinfo getCloseSEs | head -1`\n' |
310 |
> |
txt += ' for out_file in $file_list ; do\n' |
311 |
> |
txt += ' echo "lcg-cr -v -l lfn:${LFN}/$out_file -d $CLOSE_SE -P $LFN/$out_file --vo $VO file://`pwd`/$out_file" \n' |
312 |
> |
txt += ' lcg-cr -v -l lfn:${LFN}/$out_file -d $CLOSE_SE -P $LFN/$out_file --vo $VO file://`pwd`/$out_file 2>&1 \n' |
313 |
> |
txt += ' register_exit_status=$?\n' |
314 |
> |
txt += ' echo "REGISTER_EXIT_STATUS = $register_exit_status"\n' |
315 |
> |
txt += ' echo "STAGE_OUT = $register_exit_status"\n' |
316 |
> |
txt += ' if [ $register_exit_status -ne 0 ]; then \n' |
317 |
> |
txt += ' echo "Problems with CloseSE" \n' |
318 |
> |
txt += ' else \n' |
319 |
> |
txt += ' echo "The program was successfully executed"\n' |
320 |
> |
txt += ' echo "SE = $CLOSE_SE"\n' |
321 |
> |
txt += ' echo "LFN for the file is LFN=${LFN}/$out_file"\n' |
322 |
> |
txt += ' fi \n' |
323 |
> |
txt += ' echo "StageOutExitStatus = $register_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
324 |
> |
txt += ' done\n' |
325 |
> |
txt += 'else\n' |
326 |
> |
txt += ' echo "Problem with the executable"\n' |
327 |
> |
txt += 'fi \n' |
328 |
> |
return txt |
329 |
> |
|
330 |
> |
def loggingInfo(self, id): |
331 |
|
""" |
332 |
|
retrieve the logging info from logging and bookkeeping and return it |
333 |
|
""" |
334 |
< |
id = common.jobDB.jobId(nj) |
335 |
< |
edg_ui_cfg_opt = '' |
336 |
< |
if self.edg_config: |
337 |
< |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
95 |
< |
cmd = 'edg-job-get-logging-info -v 2 ' + edg_ui_cfg_opt + id |
96 |
< |
print cmd |
97 |
< |
myCmd = os.popen(cmd) |
98 |
< |
cmd_out = myCmd.readlines() |
99 |
< |
myCmd.close() |
334 |
> |
self.checkProxy() |
335 |
> |
cmd = 'edg-job-get-logging-info -v 2 ' + id |
336 |
> |
#cmd_out = os.popen(cmd) |
337 |
> |
cmd_out = runCommand(cmd) |
338 |
|
return cmd_out |
339 |
|
|
340 |
|
def listMatch(self, nj): |
341 |
|
""" |
342 |
|
Check the compatibility of available resources |
343 |
|
""" |
344 |
+ |
self.checkProxy() |
345 |
|
jdl = common.job_list[nj].jdlFilename() |
346 |
< |
edg_ui_cfg_opt = '' |
347 |
< |
if self.edg_config: |
348 |
< |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
349 |
< |
if self.edg_config_vo: |
350 |
< |
edg_ui_cfg_opt += ' --config-vo ' + self.edg_config_vo + ' ' |
112 |
< |
cmd = 'edg-job-list-match ' + edg_ui_cfg_opt + jdl |
113 |
< |
myCmd = os.popen(cmd) |
114 |
< |
cmd_out = myCmd.readlines() |
115 |
< |
myCmd.close() |
346 |
> |
cmd = 'edg-job-list-match ' + self.configOpt_() + jdl |
347 |
> |
cmd_out = runCommand(cmd,0,10) |
348 |
> |
if not cmd_out: |
349 |
> |
raise CrabException("ERROR: "+cmd+" failed!") |
350 |
> |
|
351 |
|
return self.parseListMatch_(cmd_out, jdl) |
352 |
|
|
353 |
|
def parseListMatch_(self, out, jdl): |
354 |
+ |
""" |
355 |
+ |
Parse the f* output of edg-list-match and produce something sensible |
356 |
+ |
""" |
357 |
|
reComment = re.compile( r'^\**$' ) |
358 |
|
reEmptyLine = re.compile( r'^$' ) |
359 |
|
reVO = re.compile( r'Selected Virtual Organisation name.*' ) |
360 |
< |
reCE = re.compile( r'CEId' ) |
360 |
> |
reLine = re.compile( r'.*') |
361 |
> |
reCE = re.compile( r'(.*:.*)') |
362 |
> |
reCEId = re.compile( r'CEId.*') |
363 |
|
reNO = re.compile( r'No Computing Element matching' ) |
364 |
|
reRB = re.compile( r'Connecting to host' ) |
365 |
|
next = 0 |
366 |
|
CEs=[] |
367 |
|
Match=0 |
368 |
< |
for line in out: |
369 |
< |
line = line.strip() |
370 |
< |
if reComment.match( line ): |
371 |
< |
next = 0 |
372 |
< |
continue |
373 |
< |
if reEmptyLine.match(line): |
374 |
< |
continue |
368 |
> |
|
369 |
> |
#print out |
370 |
> |
lines = reLine.findall(out) |
371 |
> |
|
372 |
> |
i=0 |
373 |
> |
CEs=[] |
374 |
> |
for line in lines: |
375 |
> |
string.strip(line) |
376 |
> |
#print line |
377 |
> |
if reNO.match( line ): |
378 |
> |
common.logger.debug(5,line) |
379 |
> |
return 0 |
380 |
> |
pass |
381 |
|
if reVO.match( line ): |
382 |
< |
VO =line.split()[-1] |
383 |
< |
common.logger.debug(5, 'VO :'+VO) |
382 |
> |
VO =reVO.match( line ).group() |
383 |
> |
common.logger.debug(5,"VO "+VO) |
384 |
|
pass |
385 |
+ |
|
386 |
|
if reRB.match( line ): |
387 |
< |
RB =line.split()[3] |
388 |
< |
common.logger.debug(5, 'Using RB :'+RB) |
387 |
> |
RB = reRB.match(line).group() |
388 |
> |
common.logger.debug(5,"RB "+RB) |
389 |
|
pass |
390 |
< |
if reCE.search( line ): |
391 |
< |
next = 1 |
392 |
< |
continue |
393 |
< |
if next: |
394 |
< |
CE=line.split(':')[0] |
395 |
< |
CEs.append(CE) |
396 |
< |
common.logger.debug(5, 'Matched CE :'+CE) |
397 |
< |
Match=Match+1 |
151 |
< |
pass |
152 |
< |
if reNO.match( line ): |
153 |
< |
common.logger.debug(5,line) |
154 |
< |
self.noMatchFound_(jdl) |
155 |
< |
Match=0 |
390 |
> |
|
391 |
> |
if reCEId.search( line ): |
392 |
> |
for lineCE in lines[i:-1]: |
393 |
> |
if reCE.match( lineCE ): |
394 |
> |
CE = string.strip(reCE.search(lineCE).group(1)) |
395 |
> |
CEs.append(CE.split(':')[0]) |
396 |
> |
pass |
397 |
> |
pass |
398 |
|
pass |
399 |
< |
return Match |
399 |
> |
i=i+1 |
400 |
> |
pass |
401 |
> |
|
402 |
> |
common.logger.debug(5,"All CE :"+str(CEs)) |
403 |
> |
|
404 |
> |
sites = [] |
405 |
> |
[sites.append(it) for it in CEs if not sites.count(it)] |
406 |
> |
|
407 |
> |
common.logger.debug(5,"All Sites :"+str(sites)) |
408 |
> |
return len(sites) |
409 |
|
|
410 |
|
def noMatchFound_(self, jdl): |
411 |
|
reReq = re.compile( r'Requirements' ) |
431 |
|
Submit one EDG job. |
432 |
|
""" |
433 |
|
|
434 |
+ |
self.checkProxy() |
435 |
|
jid = None |
436 |
|
jdl = common.job_list[nj].jdlFilename() |
437 |
< |
id_tmp = tempfile.mktemp() |
438 |
< |
edg_ui_cfg_opt = ' ' |
187 |
< |
if self.edg_config: |
188 |
< |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
189 |
< |
if self.edg_config_vo: |
190 |
< |
edg_ui_cfg_opt += ' --config-vo ' + self.edg_config_vo + ' ' |
191 |
< |
cmd = 'edg-job-submit -o ' + id_tmp + edg_ui_cfg_opt + jdl |
437 |
> |
|
438 |
> |
cmd = 'edg-job-submit ' + self.configOpt_() + jdl |
439 |
|
cmd_out = runCommand(cmd) |
440 |
|
if cmd_out != None: |
441 |
< |
idfile = open(id_tmp) |
442 |
< |
jid_line = idfile.readline() |
196 |
< |
while jid_line[0] == '#': |
197 |
< |
jid_line = idfile.readline() |
198 |
< |
pass |
199 |
< |
jid = string.strip(jid_line) |
200 |
< |
os.unlink(id_tmp) |
441 |
> |
reSid = re.compile( r'https.+' ) |
442 |
> |
jid = reSid.search(cmd_out).group() |
443 |
|
pass |
444 |
|
return jid |
445 |
|
|
446 |
+ |
def resubmit(self, nj_list): |
447 |
+ |
""" |
448 |
+ |
Prepare jobs to be submit |
449 |
+ |
""" |
450 |
+ |
return |
451 |
+ |
|
452 |
|
def getExitStatus(self, id): |
453 |
|
return self.getStatusAttribute_(id, 'exit_code') |
454 |
|
|
462 |
|
def getStatusAttribute_(self, id, attr): |
463 |
|
""" Query a status of the job with id """ |
464 |
|
|
465 |
+ |
self.checkProxy() |
466 |
|
hstates = {} |
467 |
|
Status = importName('edg_wl_userinterface_common_LbWrapper', 'Status') |
468 |
|
# Bypass edg-job-status interfacing directly to C++ API |
474 |
|
jobStat.getStatus(id, level) |
475 |
|
err, apiMsg = jobStat.get_error() |
476 |
|
if err: |
477 |
< |
print 'Error caught', apiMsg |
229 |
< |
common.log.message(apiMsg) |
477 |
> |
common.logger.debug(5,'Error caught' + apiMsg) |
478 |
|
return None |
479 |
|
else: |
480 |
|
for i in range(len(self.states)): |
495 |
|
Returns the name of directory with results. |
496 |
|
""" |
497 |
|
|
498 |
+ |
self.checkProxy() |
499 |
|
cmd = 'edg-job-get-output --dir ' + common.work_space.resDir() + ' ' + id |
500 |
|
cmd_out = runCommand(cmd) |
501 |
|
|
507 |
|
|
508 |
|
def cancel(self, id): |
509 |
|
""" Cancel the EDG job with id """ |
510 |
+ |
self.checkProxy() |
511 |
|
cmd = 'edg-job-cancel --noint ' + id |
512 |
|
cmd_out = runCommand(cmd) |
513 |
|
return cmd_out |
514 |
|
|
265 |
– |
def checkProxy_(self): |
266 |
– |
""" |
267 |
– |
Function to check the Globus proxy. |
268 |
– |
""" |
269 |
– |
cmd = 'grid-proxy-info -timeleft' |
270 |
– |
cmd_out = runCommand(cmd) |
271 |
– |
ok = 1 |
272 |
– |
timeleft = -999 |
273 |
– |
try: timeleft = int(cmd_out) |
274 |
– |
except ValueError: ok=0 |
275 |
– |
except TypeError: ok=0 |
276 |
– |
if timeleft < 1: ok=0 |
277 |
– |
|
278 |
– |
if ok==0: |
279 |
– |
print "No valid proxy found !\n" |
280 |
– |
print "Creating a user proxy with default length of 100h\n" |
281 |
– |
msg = "Unable to create a valid proxy!\n" |
282 |
– |
if os.system("grid-proxy-init -valid 100:00"): |
283 |
– |
raise CrabException(msg) |
284 |
– |
return |
285 |
– |
|
515 |
|
def createSchScript(self, nj): |
516 |
|
""" |
517 |
|
Create a JDL-file for EDG. |
526 |
|
title = '# This JDL was generated by '+\ |
527 |
|
common.prog_name+' (version '+common.prog_version_str+')\n' |
528 |
|
jt_string = '' |
529 |
+ |
|
530 |
+ |
|
531 |
|
|
532 |
|
SPL = inp_storage_subdir |
533 |
|
if ( SPL and SPL[-1] != '/' ) : SPL = SPL + '/' |
540 |
|
jdl.write('Executable = "' + os.path.basename(script) +'";\n') |
541 |
|
jdl.write(jt_string) |
542 |
|
|
543 |
+ |
### only one .sh JDL has arguments: |
544 |
|
firstEvent = common.jobDB.firstEvent(nj) |
545 |
|
maxEvents = common.jobDB.maxEvents(nj) |
546 |
|
jdl.write('Arguments = "' + str(nj+1)+' '+str(firstEvent)+' '+str(maxEvents)+'";\n') |
557 |
|
#if common.use_jam: |
558 |
|
# inp_box = inp_box+' "'+common.bin_dir+'/'+common.run_jam+'",' |
559 |
|
|
560 |
< |
for addFile in jbt.additional_inbox_files: |
561 |
< |
addFile = os.path.abspath(addFile) |
562 |
< |
inp_box = inp_box+' "'+addFile+'",' |
563 |
< |
pass |
560 |
> |
# Marco (VERY TEMPORARY ML STUFF) |
561 |
> |
inp_box = inp_box+' "' + os.path.abspath(os.environ['CRABDIR']+'/python/'+'report.py') + '", "' +\ |
562 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'Logger.py') + '", "'+\ |
563 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'ProcInfo.py') + '", "'+\ |
564 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'apmon.py') + '"' |
565 |
> |
# End Marco |
566 |
> |
|
567 |
> |
if (not jbt.additional_inbox_files == []): |
568 |
> |
inp_box = inp_box + ', ' |
569 |
> |
for addFile in jbt.additional_inbox_files: |
570 |
> |
addFile = os.path.abspath(addFile) |
571 |
> |
inp_box = inp_box+' "'+addFile+'",' |
572 |
> |
pass |
573 |
|
|
574 |
|
if inp_box[-1] == ',' : inp_box = inp_box[:-1] |
575 |
|
inp_box = inp_box + ' };\n' |
577 |
|
|
578 |
|
jdl.write('StdOutput = "' + job.stdout() + '";\n') |
579 |
|
jdl.write('StdError = "' + job.stderr() + '";\n') |
580 |
< |
|
581 |
< |
#if common.flag_return_data : |
582 |
< |
# for fl in job.outputDataFiles(): |
583 |
< |
# out_box = out_box + ' "' + fl + '",' |
584 |
< |
# pass |
585 |
< |
# pass |
586 |
< |
|
587 |
< |
out_box = 'OutputSandbox = { ' |
588 |
< |
if out_sandbox != None: |
589 |
< |
for fl in out_sandbox: |
590 |
< |
out_box = out_box + ' "' + fl + '",' |
580 |
> |
|
581 |
> |
|
582 |
> |
if job.stdout() == job.stderr(): |
583 |
> |
out_box = 'OutputSandbox = { "' + \ |
584 |
> |
job.stdout() + '", ".BrokerInfo",' |
585 |
> |
else: |
586 |
> |
out_box = 'OutputSandbox = { "' + \ |
587 |
> |
job.stdout() + '", "' + \ |
588 |
> |
job.stderr() + '", ".BrokerInfo",' |
589 |
> |
|
590 |
> |
if int(self.return_data) == 1: |
591 |
> |
if out_sandbox != None: |
592 |
> |
for fl in out_sandbox: |
593 |
> |
out_box = out_box + ' "' + fl + '",' |
594 |
> |
pass |
595 |
|
pass |
596 |
|
pass |
597 |
< |
|
597 |
> |
|
598 |
|
if out_box[-1] == ',' : out_box = out_box[:-1] |
599 |
|
out_box = out_box + ' };' |
600 |
|
jdl.write(out_box+'\n') |
601 |
|
|
357 |
– |
if common.analisys_common_info['sites']: |
358 |
– |
if common.analisys_common_info['sw_version']: |
359 |
– |
|
360 |
– |
req='Requirements = ' |
361 |
– |
### First ORCA version |
362 |
– |
req=req + 'Member("VO-cms-' + \ |
363 |
– |
common.analisys_common_info['sw_version'] + \ |
364 |
– |
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
365 |
– |
## then sites |
366 |
– |
if len(common.analisys_common_info['sites'])>0: |
367 |
– |
req = req + ' && (' |
368 |
– |
for i in range(len(common.analisys_common_info['sites'])): |
369 |
– |
req = req + 'other.GlueCEInfoHostName == "' \ |
370 |
– |
+ common.analisys_common_info['sites'][i] + '"' |
371 |
– |
if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ): |
372 |
– |
req = req + ' || ' |
373 |
– |
req = req + ')' |
374 |
– |
## then user requirement |
375 |
– |
if self.EDG_requirements: |
376 |
– |
req = req + ' && ' + self.EDG_requirements |
377 |
– |
req = req + ';\n' |
378 |
– |
jdl.write(req) |
602 |
|
|
603 |
+ |
req='Requirements = ' |
604 |
+ |
req = req + jbt.getRequirements() |
605 |
+ |
# ### if at least a CE exists ... |
606 |
+ |
# if common.analisys_common_info['sites']: |
607 |
+ |
# if common.analisys_common_info['sw_version']: |
608 |
+ |
# req='Requirements = ' |
609 |
+ |
# req=req + 'Member("VO-cms-' + \ |
610 |
+ |
# common.analisys_common_info['sw_version'] + \ |
611 |
+ |
# '", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
612 |
+ |
# if len(common.analisys_common_info['sites'])>0: |
613 |
+ |
# req = req + ' && (' |
614 |
+ |
# for i in range(len(common.analisys_common_info['sites'])): |
615 |
+ |
# req = req + 'other.GlueCEInfoHostName == "' \ |
616 |
+ |
# + common.analisys_common_info['sites'][i] + '"' |
617 |
+ |
# if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ): |
618 |
+ |
# req = req + ' || ' |
619 |
+ |
# req = req + ')' |
620 |
+ |
#### and USER REQUIREMENT |
621 |
+ |
if self.EDG_requirements: |
622 |
+ |
if (req == 'Requirement = '): |
623 |
+ |
req = req + self.EDG_requirements |
624 |
+ |
else: |
625 |
+ |
req = req + ' && ' + self.EDG_requirements |
626 |
+ |
if self.EDG_clock_time: |
627 |
+ |
if (req == 'Requirement = '): |
628 |
+ |
req = req + 'other.GlueCEPolicyMaxWallClockTime>='+self.EDG_clock_time |
629 |
+ |
else: |
630 |
+ |
req = req + ' && other.GlueCEPolicyMaxWallClockTime>='+self.EDG_clock_time |
631 |
+ |
|
632 |
+ |
if self.EDG_cpu_time: |
633 |
+ |
if (req == 'Requirement = '): |
634 |
+ |
req = req + ' other.GlueCEPolicyMaxCPUTime>='+self.EDG_cpu_time |
635 |
+ |
else: |
636 |
+ |
req = req + ' && other.GlueCEPolicyMaxCPUTime>='+self.EDG_cpu_time |
637 |
+ |
if (req != 'Requirement = '): |
638 |
+ |
req = req + ';\n' |
639 |
+ |
jdl.write(req) |
640 |
+ |
|
641 |
|
jdl.write('VirtualOrganisation = "' + self.VO + '";\n') |
642 |
|
|
643 |
|
if ( self.EDG_retry_count ): |
646 |
|
|
647 |
|
jdl.close() |
648 |
|
return |
649 |
+ |
|
650 |
+ |
def checkProxy(self): |
651 |
+ |
""" |
652 |
+ |
Function to check the Globus proxy. |
653 |
+ |
""" |
654 |
+ |
if (self.proxyValid): return |
655 |
+ |
timeleft = -999 |
656 |
+ |
minTimeLeft=10 # in hours |
657 |
+ |
cmd = 'grid-proxy-info -e -v '+str(minTimeLeft)+':00' |
658 |
+ |
# SL Here I have to use os.system since the stupid command exit with >0 if no valid proxy is found |
659 |
+ |
cmd_out = os.system(cmd) |
660 |
+ |
if (cmd_out>0): |
661 |
+ |
common.logger.message( "No valid proxy found or timeleft too short!\n Creating a user proxy with default length of 100h\n") |
662 |
+ |
cmd = 'grid-proxy-init -valid 100:00' |
663 |
+ |
try: |
664 |
+ |
# SL as above: damn it! |
665 |
+ |
out = os.system(cmd) |
666 |
+ |
if (out>0): raise CrabException("Unable to create a valid proxy!\n") |
667 |
+ |
except: |
668 |
+ |
msg = "Unable to create a valid proxy!\n" |
669 |
+ |
raise CrabException(msg) |
670 |
+ |
# cmd = 'grid-proxy-info -timeleft' |
671 |
+ |
# cmd_out = runCommand(cmd,0,20) |
672 |
+ |
pass |
673 |
+ |
self.proxyValid=1 |
674 |
+ |
return |
675 |
+ |
|
676 |
+ |
def configOpt_(self): |
677 |
+ |
edg_ui_cfg_opt = ' ' |
678 |
+ |
if self.edg_config: |
679 |
+ |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
680 |
+ |
if self.edg_config_vo: |
681 |
+ |
edg_ui_cfg_opt += ' --config-vo ' + self.edg_config_vo + ' ' |
682 |
+ |
return edg_ui_cfg_opt |