1 |
|
from Actor import * |
2 |
|
from crab_util import * |
3 |
– |
import EdgLoggingInfo |
4 |
– |
import CondorGLoggingInfo |
3 |
|
import common |
4 |
|
import string, os |
5 |
|
|
7 |
|
def __init__(self, cfg_params, nj_list): |
8 |
|
self.cfg_params = cfg_params |
9 |
|
self.nj_list = nj_list |
10 |
+ |
self.all_jobs=common._db.nJobs('list') |
11 |
+ |
|
12 |
+ |
self.fname_base = common.work_space.jobDir() + self.cfg_params['CRAB.jobtype'].upper() + '_' |
13 |
|
|
13 |
– |
if common.scheduler.boss_scheduler_name == 'condor_g': |
14 |
– |
# create hash of cfg file |
15 |
– |
self.hash = makeCksum(common.work_space.cfgFileName()) |
16 |
– |
else: |
17 |
– |
self.hash = '' |
18 |
– |
|
14 |
|
return |
15 |
|
|
16 |
|
def run(self): |
17 |
|
""" |
18 |
|
The main method of the class. |
19 |
|
""" |
20 |
< |
common.logger.debug(5, "PostMortem::run() called") |
20 |
> |
common.logger.debug( "PostMortem::run() called") |
21 |
|
|
22 |
< |
if len(self.nj_list)==0: |
28 |
< |
common.logger.debug(5, "No jobs to check") |
29 |
< |
return |
22 |
> |
self.collectLogging() |
23 |
|
|
24 |
< |
# run a list-match on first job |
25 |
< |
for nj in self.nj_list: |
26 |
< |
id = common.scheduler.boss_SID(nj) |
27 |
< |
out = common.scheduler.loggingInfo(id) |
28 |
< |
job = common.job_list[nj-1] |
29 |
< |
jobnum_str = '%06d' % (int(nj)) |
30 |
< |
fname = common.work_space.jobDir() + '/' + self.cfg_params['CRAB.jobtype'].upper() + '_' + jobnum_str + '.loggingInfo' |
24 |
> |
def collectOneLogging(self, id): |
25 |
> |
job=self.up_task.getJob(id) |
26 |
> |
if not job: #id not in self.all_jobs: |
27 |
> |
common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ') |
28 |
> |
return |
29 |
> |
elif job.runningJob['state'] == 'Created': |
30 |
> |
common.logger.info('Warning: job # ' + str(id) + ' just Created ! Not possible to ask for postMortem ') |
31 |
> |
else: |
32 |
> |
fname = self.fname_base + str(id) + '.LoggingInfo' |
33 |
|
if os.path.exists(fname): |
34 |
< |
common.logger.message('Logging info for job '+str(nj)+' already present in '+fname+' Remove it for update') |
35 |
< |
continue |
36 |
< |
jdl = open(fname, 'w') |
37 |
< |
for line in out: jdl.write(line) |
38 |
< |
jdl.close() |
39 |
< |
|
40 |
< |
reason = '' |
41 |
< |
if common.scheduler.boss_scheduler_name == "edg" or common.scheduler.boss_scheduler_name == "glite" : |
42 |
< |
loggingInfo = EdgLoggingInfo.EdgLoggingInfo() |
43 |
< |
reason = loggingInfo.decodeReason(out) |
44 |
< |
elif common.scheduler.boss_scheduler_name == "condor_g" : |
50 |
< |
loggingInfo = CondorGLoggingInfo.CondorGLoggingInfo() |
51 |
< |
reason = loggingInfo.decodeReason(out) |
52 |
< |
else : |
53 |
< |
reason = out |
54 |
< |
|
55 |
< |
common.logger.message('Logging info for job '+str(nj)+': '+reason+'\n written to '+fname) |
56 |
< |
|
57 |
< |
# ML reporting |
58 |
< |
jobId = '' |
59 |
< |
if common.scheduler.boss_scheduler_name == 'condor_g': |
60 |
< |
jobId = str(nj) + '_' + self.hash + '_' + id |
61 |
< |
else: |
62 |
< |
jobId = str(nj) + '_' + id |
63 |
< |
|
64 |
< |
params = {'taskId': self.cfg_params['taskId'], 'jobId': jobId, \ |
65 |
< |
'sid': id, |
66 |
< |
'PostMortemCategory': loggingInfo.getCategory(), \ |
67 |
< |
'PostMortemReason': loggingInfo.getReason()} |
68 |
< |
self.cfg_params['apmon'].sendToML(params) |
69 |
< |
pass |
34 |
> |
common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update') |
35 |
> |
return |
36 |
> |
common.scheduler.loggingInfo(id,fname) |
37 |
> |
fl = open(fname, 'r') |
38 |
> |
out = "".join(fl.readlines()) |
39 |
> |
fl.close() |
40 |
> |
reason = self.decodeLogging(out) |
41 |
> |
common.logger.info('Logging info for job '+ str(id) +' written to '+str(fname)) |
42 |
> |
common.logger.info('Reason for job status is:\n\n'+str(reason)+'\n') |
43 |
> |
return |
44 |
> |
|
45 |
|
|
46 |
+ |
def collectLogging(self): |
47 |
+ |
self.up_task = common._db.getTask( self.nj_list ) |
48 |
+ |
for id in self.nj_list: |
49 |
+ |
self.collectOneLogging(id) |
50 |
|
return |
51 |
+ |
|
52 |
+ |
def decodeLogging(self, out): |
53 |
+ |
""" |
54 |
+ |
""" |
55 |
+ |
return common.scheduler.decodeLogInfo(out) |
56 |
|
|