2 |
|
from crab_logger import Logger |
3 |
|
from crab_exceptions import * |
4 |
|
from crab_util import * |
5 |
+ |
from EdgConfig import * |
6 |
|
import common |
7 |
|
|
8 |
< |
import os, sys, tempfile |
8 |
> |
import os, sys, time |
9 |
|
|
10 |
|
class SchedulerEdg(Scheduler): |
11 |
|
def __init__(self): |
12 |
|
Scheduler.__init__(self,"EDG") |
13 |
+ |
self.states = [ "Acl", "cancelReason", "cancelling","ce_node","children", \ |
14 |
+ |
"children_hist","children_num","children_states","condorId","condor_jdl", \ |
15 |
+ |
"cpuTime","destination", "done_code","exit_code","expectFrom", \ |
16 |
+ |
"expectUpdate","globusId","jdl","jobId","jobtype", \ |
17 |
+ |
"lastUpdateTime","localId","location", "matched_jdl","network_server", \ |
18 |
+ |
"owner","parent_job", "reason","resubmitted","rsl","seed",\ |
19 |
+ |
"stateEnterTime","stateEnterTimes","subjob_failed", \ |
20 |
+ |
"user tags" , "status" , "status_code","hierarchy"] |
21 |
|
return |
22 |
|
|
23 |
|
def configure(self, cfg_params): |
24 |
< |
|
25 |
< |
try: self.edg_ui_cfg = cfg_params["EDG.rb_config"] |
26 |
< |
except KeyError: self.edg_ui_cfg = '' |
27 |
< |
|
28 |
< |
try: self.edg_config = cfg_params["EDG.config"] |
29 |
< |
except KeyError: self.edg_config = '' |
30 |
< |
|
31 |
< |
try: self.edg_config_vo = cfg_params["EDG.config_vo"] |
32 |
< |
except KeyError: self.edg_config_vo = '' |
24 |
> |
|
25 |
> |
try: |
26 |
> |
RB = cfg_params["EDG.rb"] |
27 |
> |
edgConfig = EdgConfig(RB) |
28 |
> |
self.edg_config = edgConfig.config() |
29 |
> |
self.edg_config_vo = edgConfig.configVO() |
30 |
> |
except KeyError: |
31 |
> |
self.edg_config = '' |
32 |
> |
self.edg_config_vo = '' |
33 |
|
|
34 |
|
try: self.LCG_version = cfg_params["EDG.lcg_version"] |
35 |
|
except KeyError: self.LCG_version = '2' |
37 |
|
try: self.EDG_requirements = cfg_params['EDG.requirements'] |
38 |
|
except KeyError: self.EDG_requirements = '' |
39 |
|
|
40 |
< |
try: self.EDG_retry_count = cfg_params['EDG.retry_count'] |
40 |
> |
try: |
41 |
> |
self.EDG_retry_count = cfg_params['EDG.retry_count'] |
42 |
> |
#print "self.EDG_retry_count = ", self.EDG_retry_count |
43 |
|
except KeyError: self.EDG_retry_count = '' |
44 |
|
|
45 |
+ |
try: |
46 |
+ |
self.EDG_ce_black_list = cfg_params['EDG.ce_black_list'] |
47 |
+ |
#print "self.EDG_ce_black_list = ", self.EDG_ce_black_list |
48 |
+ |
except KeyError: |
49 |
+ |
self.EDG_ce_black_list = '' |
50 |
+ |
|
51 |
+ |
try: |
52 |
+ |
self.EDG_ce_white_list = cfg_params['EDG.ce_white_list'] |
53 |
+ |
#print "self.EDG_ce_white_list = ", self.EDG_ce_white_list |
54 |
+ |
except KeyError: self.EDG_ce_white_list = '' |
55 |
+ |
|
56 |
+ |
try: self.VO = cfg_params['EDG.virtual_organization'] |
57 |
+ |
except KeyError: self.VO = 'cms' |
58 |
+ |
|
59 |
|
try: |
60 |
< |
self.VO = cfg_params['EDG.virtual_organization'] |
60 |
> |
self.copy_input_data = common.analisys_common_info['copy_input_data'] |
61 |
> |
#print "self.copy_input_data = ", self.copy_input_data |
62 |
> |
except KeyError: self.copy_input_data = 0 |
63 |
> |
|
64 |
> |
try: self.return_data = cfg_params['USER.return_data'] |
65 |
> |
except KeyError: self.return_data = 1 |
66 |
> |
|
67 |
> |
try: |
68 |
> |
self.copy_data = cfg_params["USER.copy_data"] |
69 |
> |
if int(self.copy_data) == 1: |
70 |
> |
try: |
71 |
> |
self.SE = cfg_params['USER.storage_element'] |
72 |
> |
self.SE_PATH = cfg_params['USER.storage_path'] |
73 |
> |
except KeyError: |
74 |
> |
msg = "Error. The [USER] section does not have 'storage_element'" |
75 |
> |
msg = msg + " and/or 'storage_path' entries, necessary to copy the output" |
76 |
> |
common.logger.message(msg) |
77 |
> |
raise CrabException(msg) |
78 |
> |
except KeyError: self.copy_data = 0 |
79 |
> |
|
80 |
> |
if ( int(self.return_data) == 0 and int(self.copy_data) == 0 ): |
81 |
> |
msg = 'Warning: return_data = 0 and copy_data = 0 ==> your exe output will be lost\n' |
82 |
> |
msg = msg + 'Please modify return_data and copy_data value in your crab.cfg file\n' |
83 |
> |
raise CrabException(msg) |
84 |
> |
|
85 |
> |
try: |
86 |
> |
self.lfc_host = cfg_params['EDG.lfc_host'] |
87 |
|
except KeyError: |
88 |
< |
msg = 'EDG.virtual_organization is mandatory.' |
88 |
> |
msg = "Error. The [EDG] section does not have 'lfc_host' value" |
89 |
> |
msg = msg + " it's necessary to know the LFC host name" |
90 |
> |
common.logger.message(msg) |
91 |
|
raise CrabException(msg) |
92 |
+ |
try: |
93 |
+ |
self.lcg_catalog_type = cfg_params['EDG.lcg_catalog_type'] |
94 |
+ |
except KeyError: |
95 |
+ |
msg = "Error. The [EDG] section does not have 'lcg_catalog_type' value" |
96 |
+ |
msg = msg + " it's necessary to know the catalog type" |
97 |
+ |
common.logger.message(msg) |
98 |
+ |
raise CrabException(msg) |
99 |
+ |
try: |
100 |
+ |
self.lfc_home = cfg_params['EDG.lfc_home'] |
101 |
+ |
except KeyError: |
102 |
+ |
msg = "Error. The [EDG] section does not have 'lfc_home' value" |
103 |
+ |
msg = msg + " it's necessary to know the home catalog dir" |
104 |
+ |
common.logger.message(msg) |
105 |
+ |
raise CrabException(msg) |
106 |
+ |
|
107 |
+ |
try: |
108 |
+ |
self.register_data = cfg_params["USER.register_data"] |
109 |
+ |
if int(self.register_data) == 1: |
110 |
+ |
try: |
111 |
+ |
self.LFN = cfg_params['USER.lfn_dir'] |
112 |
+ |
except KeyError: |
113 |
+ |
msg = "Error. The [USER] section does not have 'lfn_dir' value" |
114 |
+ |
msg = msg + " it's necessary for LCF registration" |
115 |
+ |
common.logger.message(msg) |
116 |
+ |
raise CrabException(msg) |
117 |
+ |
except KeyError: self.register_data = 0 |
118 |
+ |
|
119 |
+ |
if ( int(self.copy_data) == 0 and int(self.register_data) == 1 ): |
120 |
+ |
msg = 'Warning: register_data = 1 must be used with copy_data = 1\n' |
121 |
+ |
msg = msg + 'Please modify copy_data value in your crab.cfg file\n' |
122 |
+ |
common.logger.message(msg) |
123 |
+ |
raise CrabException(msg) |
124 |
|
|
125 |
< |
|
126 |
< |
#self.scripts_dir = common.bin_dir + '/scripts' |
127 |
< |
#self.cmd_prefix = 'edg' |
128 |
< |
#if common.LCG_version == '0' : self.cmd_prefix = 'dg' |
125 |
> |
try: self.EDG_requirements = cfg_params['EDG.requirements'] |
126 |
> |
except KeyError: self.EDG_requirements = '' |
127 |
> |
|
128 |
> |
try: self.EDG_retry_count = cfg_params['EDG.retry_count'] |
129 |
> |
except KeyError: self.EDG_retry_count = '' |
130 |
> |
|
131 |
> |
try: self.EDG_clock_time = cfg_params['EDG.max_wall_clock_time'] |
132 |
> |
except KeyError: self.EDG_clock_time= '' |
133 |
> |
|
134 |
> |
try: self.EDG_cpu_time = cfg_params['EDG.max_cpu_time'] |
135 |
> |
except KeyError: self.EDG_cpu_time = '' |
136 |
|
|
137 |
|
# Add EDG_WL_LOCATION to the python path |
138 |
|
|
147 |
|
libPath=os.path.join(path, "lib", "python") |
148 |
|
sys.path.append(libPath) |
149 |
|
|
150 |
< |
self.checkProxy_() |
150 |
> |
self.proxyValid=0 |
151 |
|
return |
152 |
|
|
153 |
+ |
|
154 |
+ |
def sched_parameter(self): |
155 |
+ |
""" |
156 |
+ |
Returns file with scheduler-specific parameters |
157 |
+ |
""" |
158 |
+ |
|
159 |
+ |
if (self.edg_config and self.edg_config_vo != ''): |
160 |
+ |
self.param='sched_param.clad' |
161 |
+ |
param_file = open(common.work_space.shareDir()+'/'+self.param, 'w') |
162 |
+ |
param_file.write('RBconfig = "'+self.edg_config+'";\n') |
163 |
+ |
param_file.write('RBconfigVO = "'+self.edg_config_vo+'";') |
164 |
+ |
param_file.close() |
165 |
+ |
return 1 |
166 |
+ |
else: |
167 |
+ |
return 0 |
168 |
+ |
|
169 |
|
def wsSetupEnvironment(self): |
170 |
|
""" |
171 |
|
Returns part of a job script which does scheduler-specific work. |
172 |
|
""" |
173 |
< |
txt = '\n' |
173 |
> |
|
174 |
> |
txt = '' |
175 |
> |
if int(self.copy_data) == 1: |
176 |
> |
if self.SE: |
177 |
> |
txt += 'export SE='+self.SE+'\n' |
178 |
> |
txt += 'echo "SE = $SE"\n' |
179 |
> |
if self.SE_PATH: |
180 |
> |
if ( self.SE_PATH[-1] != '/' ) : self.SE_PATH = self.SE_PATH + '/' |
181 |
> |
txt += 'export SE_PATH='+self.SE_PATH+'\n' |
182 |
> |
txt += 'echo "SE_PATH = $SE_PATH"\n' |
183 |
> |
|
184 |
> |
txt += 'export VO='+self.VO+'\n' |
185 |
> |
### FEDE: add some line for LFC catalog setting |
186 |
> |
txt += 'if [[ $LCG_CATALOG_TYPE != \''+self.lcg_catalog_type+'\' ]]; then\n' |
187 |
> |
txt += ' export LCG_CATALOG_TYPE='+self.lcg_catalog_type+'\n' |
188 |
> |
txt += 'fi\n' |
189 |
> |
txt += 'if [[ $LFC_HOST != \''+self.lfc_host+'\' ]]; then\n' |
190 |
> |
txt += 'export LFC_HOST='+self.lfc_host+'\n' |
191 |
> |
txt += 'fi\n' |
192 |
> |
txt += 'if [[ $LFC_HOME != \''+self.lfc_home+'\' ]]; then\n' |
193 |
> |
txt += 'export LFC_HOME='+self.lfc_home+'\n' |
194 |
> |
txt += 'fi\n' |
195 |
> |
##### |
196 |
> |
if int(self.register_data) == 1: |
197 |
> |
txt += 'export LFN='+self.LFN+'\n' |
198 |
> |
txt += 'lfc-ls $LFN\n' |
199 |
> |
txt += 'result=$?\n' |
200 |
> |
txt += 'echo $result\n' |
201 |
> |
### creation of LFN dir in LFC catalog, under /grid/cms dir |
202 |
> |
txt += 'if [ $result != 0 ]; then\n' |
203 |
> |
txt += ' lfc-mkdir $LFN\n' |
204 |
> |
txt += ' result=$?\n' |
205 |
> |
txt += ' echo $result\n' |
206 |
> |
txt += 'fi\n' |
207 |
> |
txt += '\n' |
208 |
|
txt += 'CloseCEs=`edg-brokerinfo getCE`\n' |
209 |
|
txt += 'echo "CloseCEs = $CloseCEs"\n' |
210 |
|
txt += 'CE=`echo $CloseCEs | sed -e "s/:.*//"`\n' |
211 |
|
txt += 'echo "CE = $CE"\n' |
212 |
|
return txt |
213 |
|
|
214 |
< |
def loggingInfo(self, nj): |
214 |
> |
def wsCopyInput(self): |
215 |
> |
""" |
216 |
> |
Copy input data from SE to WN |
217 |
> |
""" |
218 |
> |
txt = '' |
219 |
> |
try: |
220 |
> |
self.copy_input_data = common.analisys_common_info['copy_input_data'] |
221 |
> |
#print "self.copy_input_data = ", self.copy_input_data |
222 |
> |
except KeyError: self.copy_input_data = 0 |
223 |
> |
if int(self.copy_input_data) == 1: |
224 |
> |
txt += '#\n' |
225 |
> |
txt += '# Copy Input Data from SE to this WN\n' |
226 |
> |
txt += '#\n' |
227 |
> |
### changed by georgia (put a loop copying more than one input files per jobs) |
228 |
> |
txt +='for input_file in $cur_file_list \n' |
229 |
> |
txt +='do \n' |
230 |
> |
txt +=' lcg-cp --vo $VO lfn:$input_lfn/$input_file file:`pwd`/$input_file 2>&1\n' |
231 |
> |
txt +=' copy_input_exit_status=$?\n' |
232 |
> |
txt +=' echo "COPY_INPUT_EXIT_STATUS = $copy_input_exit_status"\n' |
233 |
> |
txt +=' if [ $copy_input_exit_status -ne 0 ]; then \n' |
234 |
> |
txt +=' echo "Problems with copying to WN" \n' |
235 |
> |
txt +=' else \n' |
236 |
> |
txt +=' echo "input copied into WN" \n' |
237 |
> |
txt +=' fi \n' |
238 |
> |
txt +='done \n' |
239 |
> |
### copy a set of PU ntuples (same for each jobs -- but accessed randomly) |
240 |
> |
txt +='for file in $cur_pu_list \n' |
241 |
> |
txt +='do \n' |
242 |
> |
txt +=' lcg-cp --vo $VO lfn:$pu_lfn/$file file:`pwd`/$file 2>&1\n' |
243 |
> |
txt +=' copy_input_exit_status=$?\n' |
244 |
> |
txt +=' echo "COPY_INPUT_PU_EXIT_STATUS = $copy_input_pu_exit_status"\n' |
245 |
> |
txt +=' if [ $copy_input_pu_exit_status -ne 0 ]; then \n' |
246 |
> |
txt +=' echo "Problems with copying pu to WN" \n' |
247 |
> |
txt +=' else \n' |
248 |
> |
txt +=' echo "input pu files copied into WN" \n' |
249 |
> |
txt +=' fi \n' |
250 |
> |
txt +='done \n' |
251 |
> |
txt +='\n' |
252 |
> |
txt +='### Check SCRATCH space available on WN : \n' |
253 |
> |
txt +='df -h \n' |
254 |
> |
return txt |
255 |
> |
|
256 |
> |
def wsCopyOutput(self): |
257 |
> |
""" |
258 |
> |
Write a CopyResults part of a job script, e.g. |
259 |
> |
to copy produced output into a storage element. |
260 |
> |
""" |
261 |
> |
txt = '' |
262 |
> |
if int(self.copy_data) == 1: |
263 |
> |
txt += '#\n' |
264 |
> |
txt += '# Copy output to SE = $SE\n' |
265 |
> |
txt += '#\n' |
266 |
> |
txt += 'if [ $exe_result -eq 0 ]; then\n' |
267 |
> |
txt += ' for out_file in $file_list ; do\n' |
268 |
> |
txt += ' echo "Trying to copy output file to $SE "\n' |
269 |
> |
txt += ' echo "lcg-cp --vo cms -t 1200 file://`pwd`/$out_file gsiftp://${SE}${SE_PATH}$out_file"\n' |
270 |
> |
# txt += ' echo "globus-url-copy file://`pwd`/$out_file gsiftp://${SE}${SE_PATH}$out_file"\n' |
271 |
> |
txt += ' exitstring=`lcg-cp --vo cms -t 1200 file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n' |
272 |
> |
# txt += ' exitstring=`globus-url-copy file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n' |
273 |
> |
txt += ' copy_exit_status=$?\n' |
274 |
> |
txt += ' echo "COPY_EXIT_STATUS = $copy_exit_status"\n' |
275 |
> |
txt += ' echo "STAGE_OUT = $copy_exit_status"\n' |
276 |
> |
txt += ' if [ $copy_exit_status -ne 0 ]; then\n' |
277 |
> |
txt += ' echo "Problems with SE = $SE"\n' |
278 |
> |
txt += ' echo "StageOutExitStatus = 198" | tee -a $RUNTIME_AREA/$repo\n' |
279 |
> |
txt += ' echo "StageOutExitStatusReason = $exitstring" | tee -a $RUNTIME_AREA/$repo\n' |
280 |
> |
txt += ' else\n' |
281 |
> |
txt += ' echo "StageOutSE = $SE" | tee -a $RUNTIME_AREA/$repo\n' |
282 |
> |
txt += ' echo "StageOutCatalog = " | tee -a $RUNTIME_AREA/$repo\n' |
283 |
> |
txt += ' echo "output copied into $SE/$SE_PATH directory"\n' |
284 |
> |
txt += ' echo "StageOutExitStatus = 0" | tee -a $RUNTIME_AREA/$repo\n' |
285 |
> |
txt += ' fi\n' |
286 |
> |
txt += ' done\n' |
287 |
> |
txt += 'fi\n' |
288 |
> |
return txt |
289 |
> |
|
290 |
> |
def wsRegisterOutput(self): |
291 |
> |
""" |
292 |
> |
Returns part of a job script which does scheduler-specific work. |
293 |
> |
""" |
294 |
> |
|
295 |
> |
txt = '' |
296 |
> |
if int(self.register_data) == 1: |
297 |
> |
txt += '#\n' |
298 |
> |
txt += '# Register output to LFC\n' |
299 |
> |
txt += '#\n' |
300 |
> |
txt += 'if [[ $exe_result -eq 0 && $copy_exit_status -eq 0 ]]; then\n' |
301 |
> |
txt += ' for out_file in $file_list ; do\n' |
302 |
> |
txt += ' echo "Trying to register the output file into LFC"\n' |
303 |
> |
txt += ' echo "lcg-rf -l $LFN/$out_file --vo $VO sfn://$SE$SE_PATH/$out_file"\n' |
304 |
> |
txt += ' lcg-rf -l $LFN/$out_file --vo $VO sfn://$SE$SE_PATH/$out_file 2>&1 \n' |
305 |
> |
txt += ' register_exit_status=$?\n' |
306 |
> |
txt += ' echo "REGISTER_EXIT_STATUS = $register_exit_status"\n' |
307 |
> |
txt += ' echo "STAGE_OUT = $register_exit_status"\n' |
308 |
> |
txt += ' if [ $register_exit_status -ne 0 ]; then \n' |
309 |
> |
txt += ' echo "Problems with the registration to LFC" \n' |
310 |
> |
txt += ' echo "Try with srm protocol" \n' |
311 |
> |
txt += ' echo "lcg-rf -l $LFN/$out_file --vo $VO srm://$SE$SE_PATH/$out_file"\n' |
312 |
> |
txt += ' lcg-rf -l $LFN/$out_file --vo $VO srm://$SE$SE_PATH/$out_file 2>&1 \n' |
313 |
> |
txt += ' register_exit_status=$?\n' |
314 |
> |
txt += ' echo "REGISTER_EXIT_STATUS = $register_exit_status"\n' |
315 |
> |
txt += ' echo "STAGE_OUT = $register_exit_status"\n' |
316 |
> |
txt += ' if [ $register_exit_status -ne 0 ]; then \n' |
317 |
> |
txt += ' echo "Problems with the registration into LFC" \n' |
318 |
> |
txt += ' fi \n' |
319 |
> |
txt += ' else \n' |
320 |
> |
txt += ' echo "output registered to LFC"\n' |
321 |
> |
txt += ' fi \n' |
322 |
> |
txt += ' echo "StageOutExitStatus = $register_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
323 |
> |
txt += ' done\n' |
324 |
> |
txt += 'elif [[ $exe_result -eq 0 && $copy_exit_status -ne 0 ]]; then \n' |
325 |
> |
txt += ' echo "Trying to copy output file to CloseSE"\n' |
326 |
> |
txt += ' CLOSE_SE=`edg-brokerinfo getCloseSEs | head -1`\n' |
327 |
> |
txt += ' for out_file in $file_list ; do\n' |
328 |
> |
txt += ' echo "lcg-cr -v -l lfn:${LFN}/$out_file -d $CLOSE_SE -P $LFN/$out_file --vo $VO file://`pwd`/$out_file" \n' |
329 |
> |
txt += ' lcg-cr -v -l lfn:${LFN}/$out_file -d $CLOSE_SE -P $LFN/$out_file --vo $VO file://`pwd`/$out_file 2>&1 \n' |
330 |
> |
txt += ' register_exit_status=$?\n' |
331 |
> |
txt += ' echo "REGISTER_EXIT_STATUS = $register_exit_status"\n' |
332 |
> |
txt += ' echo "STAGE_OUT = $register_exit_status"\n' |
333 |
> |
txt += ' if [ $register_exit_status -ne 0 ]; then \n' |
334 |
> |
txt += ' echo "Problems with CloseSE" \n' |
335 |
> |
txt += ' else \n' |
336 |
> |
txt += ' echo "The program was successfully executed"\n' |
337 |
> |
txt += ' echo "SE = $CLOSE_SE"\n' |
338 |
> |
txt += ' echo "LFN for the file is LFN=${LFN}/$out_file"\n' |
339 |
> |
txt += ' fi \n' |
340 |
> |
txt += ' echo "StageOutExitStatus = $register_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
341 |
> |
txt += ' done\n' |
342 |
> |
txt += 'else\n' |
343 |
> |
txt += ' echo "Problem with the executable"\n' |
344 |
> |
txt += 'fi \n' |
345 |
> |
return txt |
346 |
> |
|
347 |
> |
def loggingInfo(self, id): |
348 |
|
""" |
349 |
|
retrieve the logging info from logging and bookkeeping and return it |
350 |
|
""" |
351 |
< |
id = common.jobDB.jobId(nj) |
352 |
< |
edg_ui_cfg_opt = '' |
353 |
< |
if self.edg_config: |
354 |
< |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
80 |
< |
cmd = 'edg-job-get-logging-info -v 2 ' + edg_ui_cfg_opt + id |
81 |
< |
print cmd |
82 |
< |
myCmd = os.popen(cmd) |
83 |
< |
cmd_out = myCmd.readlines() |
84 |
< |
myCmd.close() |
351 |
> |
self.checkProxy() |
352 |
> |
cmd = 'edg-job-get-logging-info -v 2 ' + id |
353 |
> |
#cmd_out = os.popen(cmd) |
354 |
> |
cmd_out = runCommand(cmd) |
355 |
|
return cmd_out |
356 |
|
|
357 |
|
def listMatch(self, nj): |
358 |
|
""" |
359 |
|
Check the compatibility of available resources |
360 |
|
""" |
361 |
+ |
self.checkProxy() |
362 |
|
jdl = common.job_list[nj].jdlFilename() |
363 |
< |
edg_ui_cfg_opt = '' |
364 |
< |
if self.edg_config: |
365 |
< |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
366 |
< |
if self.edg_config_vo: |
367 |
< |
edg_ui_cfg_opt += ' --config-vo ' + self.edg_config_vo + ' ' |
97 |
< |
cmd = 'edg-job-list-match ' + edg_ui_cfg_opt + jdl |
98 |
< |
myCmd = os.popen(cmd) |
99 |
< |
cmd_out = myCmd.readlines() |
100 |
< |
myCmd.close() |
363 |
> |
cmd = 'edg-job-list-match ' + self.configOpt_() + jdl |
364 |
> |
cmd_out = runCommand(cmd,0,10) |
365 |
> |
if not cmd_out: |
366 |
> |
raise CrabException("ERROR: "+cmd+" failed!") |
367 |
> |
|
368 |
|
return self.parseListMatch_(cmd_out, jdl) |
369 |
|
|
370 |
|
def parseListMatch_(self, out, jdl): |
371 |
+ |
""" |
372 |
+ |
Parse the f* output of edg-list-match and produce something sensible |
373 |
+ |
""" |
374 |
|
reComment = re.compile( r'^\**$' ) |
375 |
|
reEmptyLine = re.compile( r'^$' ) |
376 |
|
reVO = re.compile( r'Selected Virtual Organisation name.*' ) |
377 |
< |
reCE = re.compile( r'CEId' ) |
377 |
> |
reLine = re.compile( r'.*') |
378 |
> |
reCE = re.compile( r'(.*:.*)') |
379 |
> |
reCEId = re.compile( r'CEId.*') |
380 |
|
reNO = re.compile( r'No Computing Element matching' ) |
381 |
|
reRB = re.compile( r'Connecting to host' ) |
382 |
|
next = 0 |
383 |
|
CEs=[] |
384 |
|
Match=0 |
385 |
< |
for line in out: |
386 |
< |
line = line.strip() |
385 |
> |
|
386 |
> |
#print out |
387 |
> |
lines = reLine.findall(out) |
388 |
> |
|
389 |
> |
i=0 |
390 |
> |
CEs=[] |
391 |
> |
for line in lines: |
392 |
> |
string.strip(line) |
393 |
|
#print line |
394 |
< |
if reComment.match( line ): |
395 |
< |
next = 0 |
396 |
< |
continue |
397 |
< |
if reEmptyLine.match(line): |
120 |
< |
continue |
394 |
> |
if reNO.match( line ): |
395 |
> |
common.logger.debug(5,line) |
396 |
> |
return 0 |
397 |
> |
pass |
398 |
|
if reVO.match( line ): |
399 |
< |
VO =line.split()[-1] |
400 |
< |
common.logger.debug(5, 'VO :'+VO) |
399 |
> |
VO =reVO.match( line ).group() |
400 |
> |
common.logger.debug(5,"VO "+VO) |
401 |
|
pass |
402 |
+ |
|
403 |
|
if reRB.match( line ): |
404 |
< |
RB =line.split()[3] |
405 |
< |
common.logger.debug(5, 'Using RB :'+RB) |
404 |
> |
RB = reRB.match(line).group() |
405 |
> |
common.logger.debug(5,"RB "+RB) |
406 |
|
pass |
407 |
< |
if reCE.search( line ): |
408 |
< |
next = 1 |
409 |
< |
continue |
410 |
< |
if next: |
411 |
< |
CE=line.split(':')[0] |
412 |
< |
CEs.append(CE) |
413 |
< |
common.logger.debug(5, 'Matched CE :'+CE) |
414 |
< |
Match=Match+1 |
137 |
< |
pass |
138 |
< |
if reNO.match( line ): |
139 |
< |
common.logger.debug(5,line) |
140 |
< |
self.noMatchFound_(jdl) |
141 |
< |
Match=0 |
407 |
> |
|
408 |
> |
if reCEId.search( line ): |
409 |
> |
for lineCE in lines[i:-1]: |
410 |
> |
if reCE.match( lineCE ): |
411 |
> |
CE = string.strip(reCE.search(lineCE).group(1)) |
412 |
> |
CEs.append(CE.split(':')[0]) |
413 |
> |
pass |
414 |
> |
pass |
415 |
|
pass |
416 |
< |
return Match |
416 |
> |
i=i+1 |
417 |
> |
pass |
418 |
> |
|
419 |
> |
common.logger.debug(5,"All CE :"+str(CEs)) |
420 |
> |
|
421 |
> |
sites = [] |
422 |
> |
[sites.append(it) for it in CEs if not sites.count(it)] |
423 |
> |
|
424 |
> |
common.logger.debug(5,"All Sites :"+str(sites)) |
425 |
> |
return len(sites) |
426 |
|
|
427 |
|
def noMatchFound_(self, jdl): |
428 |
|
reReq = re.compile( r'Requirements' ) |
448 |
|
Submit one EDG job. |
449 |
|
""" |
450 |
|
|
451 |
+ |
self.checkProxy() |
452 |
|
jid = None |
453 |
|
jdl = common.job_list[nj].jdlFilename() |
454 |
< |
id_tmp = tempfile.mktemp() |
455 |
< |
edg_ui_cfg_opt = ' ' |
173 |
< |
if self.edg_config: |
174 |
< |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
175 |
< |
if self.edg_config_vo: |
176 |
< |
edg_ui_cfg_opt += ' --config-vo ' + self.edg_config_vo + ' ' |
177 |
< |
cmd = 'edg-job-submit -o ' + id_tmp + edg_ui_cfg_opt + jdl |
454 |
> |
|
455 |
> |
cmd = 'edg-job-submit ' + self.configOpt_() + jdl |
456 |
|
cmd_out = runCommand(cmd) |
457 |
|
if cmd_out != None: |
458 |
< |
idfile = open(id_tmp) |
459 |
< |
jid_line = idfile.readline() |
182 |
< |
while jid_line[0] == '#': |
183 |
< |
jid_line = idfile.readline() |
184 |
< |
pass |
185 |
< |
jid = string.strip(jid_line) |
186 |
< |
os.unlink(id_tmp) |
458 |
> |
reSid = re.compile( r'https.+' ) |
459 |
> |
jid = reSid.search(cmd_out).group() |
460 |
|
pass |
461 |
|
return jid |
462 |
|
|
463 |
+ |
def resubmit(self, nj_list): |
464 |
+ |
""" |
465 |
+ |
Prepare jobs to be submit |
466 |
+ |
""" |
467 |
+ |
return |
468 |
+ |
|
469 |
+ |
def getExitStatus(self, id): |
470 |
+ |
return self.getStatusAttribute_(id, 'exit_code') |
471 |
+ |
|
472 |
|
def queryStatus(self, id): |
473 |
+ |
return self.getStatusAttribute_(id, 'status') |
474 |
+ |
|
475 |
+ |
def queryDest(self, id): |
476 |
+ |
return self.getStatusAttribute_(id, 'destination') |
477 |
+ |
|
478 |
+ |
|
479 |
+ |
def getStatusAttribute_(self, id, attr): |
480 |
|
""" Query a status of the job with id """ |
481 |
< |
cmd0 = 'edg-job-status ' |
482 |
< |
cmd = cmd0 + id |
483 |
< |
cmd_out = runCommand(cmd) |
484 |
< |
if cmd_out == None: |
485 |
< |
common.logger.message('Error. No output from `'+cmd+'`') |
486 |
< |
return None |
487 |
< |
# parse output |
488 |
< |
status_prefix = 'Current Status:' |
489 |
< |
status_index = string.find(cmd_out, status_prefix) |
490 |
< |
if status_index == -1: |
491 |
< |
common.logger.message('Error. Bad output of `'+cmd0+'`:\n'+cmd_out) |
481 |
> |
|
482 |
> |
self.checkProxy() |
483 |
> |
hstates = {} |
484 |
> |
Status = importName('edg_wl_userinterface_common_LbWrapper', 'Status') |
485 |
> |
# Bypass edg-job-status interfacing directly to C++ API |
486 |
> |
# Job attribute vector to retrieve status without edg-job-status |
487 |
> |
level = 0 |
488 |
> |
# Instance of the Status class provided by LB API |
489 |
> |
jobStat = Status() |
490 |
> |
st = 0 |
491 |
> |
jobStat.getStatus(id, level) |
492 |
> |
err, apiMsg = jobStat.get_error() |
493 |
> |
if err: |
494 |
> |
common.logger.debug(5,'Error caught' + apiMsg) |
495 |
|
return None |
496 |
< |
status = cmd_out[(status_index+len(status_prefix)):] |
497 |
< |
nl = string.find(status,'\n') |
498 |
< |
status = string.strip(status[0:nl]) |
499 |
< |
return status |
496 |
> |
else: |
497 |
> |
for i in range(len(self.states)): |
498 |
> |
# Fill an hash table with all information retrieved from LB API |
499 |
> |
hstates[ self.states[i] ] = jobStat.loadStatus(st)[i] |
500 |
> |
result = jobStat.loadStatus(st)[ self.states.index(attr) ] |
501 |
> |
return result |
502 |
|
|
503 |
|
def queryDetailedStatus(self, id): |
504 |
|
""" Query a detailed status of the job with id """ |
512 |
|
Returns the name of directory with results. |
513 |
|
""" |
514 |
|
|
515 |
+ |
self.checkProxy() |
516 |
|
cmd = 'edg-job-get-output --dir ' + common.work_space.resDir() + ' ' + id |
517 |
|
cmd_out = runCommand(cmd) |
518 |
|
|
524 |
|
|
525 |
|
def cancel(self, id): |
526 |
|
""" Cancel the EDG job with id """ |
527 |
+ |
self.checkProxy() |
528 |
|
cmd = 'edg-job-cancel --noint ' + id |
529 |
|
cmd_out = runCommand(cmd) |
530 |
|
return cmd_out |
531 |
|
|
532 |
< |
def checkProxy_(self): |
237 |
< |
""" |
238 |
< |
Function to check the Globus proxy. |
239 |
< |
""" |
240 |
< |
cmd = 'grid-proxy-info -timeleft' |
241 |
< |
cmd_out = runCommand(cmd) |
242 |
< |
ok = 1 |
243 |
< |
timeleft = -999 |
244 |
< |
try: timeleft = int(cmd_out) |
245 |
< |
except ValueError: ok=0 |
246 |
< |
except TypeError: ok=0 |
247 |
< |
if timeleft < 1: ok=0 |
248 |
< |
|
249 |
< |
if ok==0: |
250 |
< |
msg = 'No valid proxy found !\n' |
251 |
< |
msg += "Please do 'grid-proxy-init'." |
252 |
< |
raise CrabException(msg) |
253 |
< |
return |
254 |
< |
|
255 |
< |
def createJDL(self, nj): |
532 |
> |
def createSchScript(self, nj): |
533 |
|
""" |
534 |
|
Create a JDL-file for EDG. |
535 |
|
""" |
536 |
|
|
537 |
|
job = common.job_list[nj] |
538 |
|
jbt = job.type() |
262 |
– |
# jbt.loadJobInfo() |
539 |
|
inp_sandbox = jbt.inputSandbox(nj) |
540 |
|
out_sandbox = jbt.outputSandbox(nj) |
541 |
< |
inp_storage_subdir = ''#jbt.inputStorageSubdir() |
541 |
> |
inp_storage_subdir = '' |
542 |
|
|
543 |
|
title = '# This JDL was generated by '+\ |
544 |
|
common.prog_name+' (version '+common.prog_version_str+')\n' |
545 |
|
jt_string = '' |
546 |
+ |
|
547 |
+ |
|
548 |
|
|
549 |
|
SPL = inp_storage_subdir |
550 |
|
if ( SPL and SPL[-1] != '/' ) : SPL = SPL + '/' |
557 |
|
jdl.write('Executable = "' + os.path.basename(script) +'";\n') |
558 |
|
jdl.write(jt_string) |
559 |
|
|
560 |
+ |
### only one .sh JDL has arguments: |
561 |
+ |
firstEvent = common.jobDB.firstEvent(nj) |
562 |
+ |
maxEvents = common.jobDB.maxEvents(nj) |
563 |
+ |
jdl.write('Arguments = "' + str(nj+1)+' '+str(firstEvent)+' '+str(maxEvents)+'";\n') |
564 |
+ |
|
565 |
|
inp_box = 'InputSandbox = { ' |
566 |
|
inp_box = inp_box + '"' + script + '",' |
567 |
|
|
574 |
|
#if common.use_jam: |
575 |
|
# inp_box = inp_box+' "'+common.bin_dir+'/'+common.run_jam+'",' |
576 |
|
|
577 |
< |
for addFile in jbt.additional_inbox_files: |
578 |
< |
addFile = os.path.abspath(addFile) |
579 |
< |
inp_box = inp_box+' "'+addFile+'",' |
580 |
< |
pass |
577 |
> |
# Marco (VERY TEMPORARY ML STUFF) |
578 |
> |
inp_box = inp_box+' "' + os.path.abspath(os.environ['CRABDIR']+'/python/'+'report.py') + '", "' +\ |
579 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'DashboardAPI.py') + '", "'+\ |
580 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'Logger.py') + '", "'+\ |
581 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'ProcInfo.py') + '", "'+\ |
582 |
> |
os.path.abspath(os.environ['CRABDIR']+'/python/'+'apmon.py') + '"' |
583 |
> |
# End Marco |
584 |
> |
|
585 |
> |
if (not jbt.additional_inbox_files == []): |
586 |
> |
inp_box = inp_box + ', ' |
587 |
> |
for addFile in jbt.additional_inbox_files: |
588 |
> |
addFile = os.path.abspath(addFile) |
589 |
> |
inp_box = inp_box+' "'+addFile+'",' |
590 |
> |
pass |
591 |
|
|
592 |
|
if inp_box[-1] == ',' : inp_box = inp_box[:-1] |
593 |
|
inp_box = inp_box + ' };\n' |
595 |
|
|
596 |
|
jdl.write('StdOutput = "' + job.stdout() + '";\n') |
597 |
|
jdl.write('StdError = "' + job.stderr() + '";\n') |
598 |
< |
|
599 |
< |
#if common.flag_return_data : |
600 |
< |
# for fl in job.outputDataFiles(): |
601 |
< |
# out_box = out_box + ' "' + fl + '",' |
602 |
< |
# pass |
603 |
< |
# pass |
604 |
< |
|
605 |
< |
out_box = 'OutputSandbox = { ' |
606 |
< |
if out_sandbox != None: |
607 |
< |
for fl in out_sandbox: |
608 |
< |
out_box = out_box + ' "' + fl + '",' |
598 |
> |
|
599 |
> |
|
600 |
> |
if job.stdout() == job.stderr(): |
601 |
> |
out_box = 'OutputSandbox = { "' + \ |
602 |
> |
job.stdout() + '", ".BrokerInfo",' |
603 |
> |
else: |
604 |
> |
out_box = 'OutputSandbox = { "' + \ |
605 |
> |
job.stdout() + '", "' + \ |
606 |
> |
job.stderr() + '", ".BrokerInfo",' |
607 |
> |
|
608 |
> |
if int(self.return_data) == 1: |
609 |
> |
if out_sandbox != None: |
610 |
> |
for fl in out_sandbox: |
611 |
> |
out_box = out_box + ' "' + fl + '",' |
612 |
> |
pass |
613 |
|
pass |
614 |
|
pass |
615 |
< |
|
615 |
> |
|
616 |
|
if out_box[-1] == ',' : out_box = out_box[:-1] |
617 |
|
out_box = out_box + ' };' |
618 |
|
jdl.write(out_box+'\n') |
619 |
|
|
323 |
– |
# If CloseCE is used ... |
324 |
– |
#if common.flag_usecloseCE and job.inputDataFiles(): |
325 |
– |
# indata = 'InputData = { ' |
326 |
– |
# for fl in job.inputDataFiles(): |
327 |
– |
# indata = indata + ' "lfn:' + SPL + fl + '",' |
328 |
– |
# if indata[-1] == ',' : indata = indata[:-1] |
329 |
– |
# indata = indata + ' };' |
330 |
– |
# jdl.write(indata+'\n') |
331 |
– |
# jdl.write('DataAccessProtocol = { "gsiftp" };\n') |
332 |
– |
|
333 |
– |
if common.analisys_common_info['sites']: |
334 |
– |
if common.analisys_common_info['sw_version']: |
335 |
– |
|
336 |
– |
req='Requirements = ' |
337 |
– |
### First ORCA version |
338 |
– |
req=req + 'Member("VO-cms-' + \ |
339 |
– |
common.analisys_common_info['sw_version'] + \ |
340 |
– |
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
341 |
– |
## then sites |
342 |
– |
if len(common.analisys_common_info['sites'])>0: |
343 |
– |
req = req + ' && (' |
344 |
– |
for i in range(len(common.analisys_common_info['sites'])): |
345 |
– |
req = req + 'other.GlueCEInfoHostName == "' \ |
346 |
– |
+ common.analisys_common_info['sites'][i] + '"' |
347 |
– |
if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ): |
348 |
– |
req = req + ' || ' |
349 |
– |
req = req + ')' |
350 |
– |
## then user requirement |
351 |
– |
if self.EDG_requirements: |
352 |
– |
req = req + ' && ' + self.EDG_requirements |
353 |
– |
req = req + ';\n' |
354 |
– |
jdl.write(req) |
620 |
|
|
621 |
+ |
req='Requirements = ' |
622 |
+ |
req = req + jbt.getRequirements() |
623 |
+ |
|
624 |
+ |
if self.EDG_requirements: |
625 |
+ |
if (req == 'Requirement = '): |
626 |
+ |
req = req + self.EDG_requirements |
627 |
+ |
else: |
628 |
+ |
req = req + ' && ' + self.EDG_requirements |
629 |
+ |
#### FEDE ##### |
630 |
+ |
if self.EDG_ce_white_list: |
631 |
+ |
ce_white_list = string.split(self.EDG_ce_white_list,',') |
632 |
+ |
#print "req = ", req |
633 |
+ |
for i in range(len(ce_white_list)): |
634 |
+ |
if i == 0: |
635 |
+ |
if (req == 'Requirement = '): |
636 |
+ |
req = req + '((RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))' |
637 |
+ |
else: |
638 |
+ |
req = req + ' && ((RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))' |
639 |
+ |
pass |
640 |
+ |
else: |
641 |
+ |
req = req + ' || (RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))' |
642 |
+ |
req = req + ')' |
643 |
+ |
|
644 |
+ |
if self.EDG_ce_black_list: |
645 |
+ |
ce_black_list = string.split(self.EDG_ce_black_list,',') |
646 |
+ |
for ce in ce_black_list: |
647 |
+ |
if (req == 'Requirement = '): |
648 |
+ |
req = req + '(!RegExp("' + ce + '", other.GlueCEUniqueId))' |
649 |
+ |
else: |
650 |
+ |
req = req + ' && (!RegExp("' + ce + '", other.GlueCEUniqueId))' |
651 |
+ |
pass |
652 |
+ |
############### |
653 |
+ |
if self.EDG_clock_time: |
654 |
+ |
if (req == 'Requirement = '): |
655 |
+ |
req = req + 'other.GlueCEPolicyMaxWallClockTime>='+self.EDG_clock_time |
656 |
+ |
else: |
657 |
+ |
req = req + ' && other.GlueCEPolicyMaxWallClockTime>='+self.EDG_clock_time |
658 |
+ |
|
659 |
+ |
if self.EDG_cpu_time: |
660 |
+ |
if (req == 'Requirement = '): |
661 |
+ |
req = req + ' other.GlueCEPolicyMaxCPUTime>='+self.EDG_cpu_time |
662 |
+ |
else: |
663 |
+ |
req = req + ' && other.GlueCEPolicyMaxCPUTime>='+self.EDG_cpu_time |
664 |
+ |
|
665 |
+ |
if (req != 'Requirement = '): |
666 |
+ |
req = req + ';\n' |
667 |
+ |
jdl.write(req) |
668 |
+ |
|
669 |
|
jdl.write('VirtualOrganisation = "' + self.VO + '";\n') |
670 |
|
|
671 |
|
if ( self.EDG_retry_count ): |
674 |
|
|
675 |
|
jdl.close() |
676 |
|
return |
677 |
+ |
|
678 |
+ |
def checkProxy(self): |
679 |
+ |
""" |
680 |
+ |
Function to check the Globus proxy. |
681 |
+ |
""" |
682 |
+ |
if (self.proxyValid): return |
683 |
+ |
timeleft = -999 |
684 |
+ |
minTimeLeft=10 # in hours |
685 |
+ |
cmd = 'grid-proxy-info -e -v '+str(minTimeLeft)+':00' |
686 |
+ |
# SL Here I have to use os.system since the stupid command exit with >0 if no valid proxy is found |
687 |
+ |
cmd_out = os.system(cmd) |
688 |
+ |
if (cmd_out>0): |
689 |
+ |
common.logger.message( "No valid proxy found or timeleft too short!\n Creating a user proxy with default length of 100h\n") |
690 |
+ |
cmd = 'grid-proxy-init -valid 100:00' |
691 |
+ |
try: |
692 |
+ |
# SL as above: damn it! |
693 |
+ |
out = os.system(cmd) |
694 |
+ |
if (out>0): raise CrabException("Unable to create a valid proxy!\n") |
695 |
+ |
except: |
696 |
+ |
msg = "Unable to create a valid proxy!\n" |
697 |
+ |
raise CrabException(msg) |
698 |
+ |
# cmd = 'grid-proxy-info -timeleft' |
699 |
+ |
# cmd_out = runCommand(cmd,0,20) |
700 |
+ |
pass |
701 |
+ |
self.proxyValid=1 |
702 |
+ |
return |
703 |
+ |
|
704 |
+ |
def configOpt_(self): |
705 |
+ |
edg_ui_cfg_opt = ' ' |
706 |
+ |
if self.edg_config: |
707 |
+ |
edg_ui_cfg_opt = ' -c ' + self.edg_config + ' ' |
708 |
+ |
if self.edg_config_vo: |
709 |
+ |
edg_ui_cfg_opt += ' --config-vo ' + self.edg_config_vo + ' ' |
710 |
+ |
return edg_ui_cfg_opt |