1 |
mcinquil |
1.5 |
from PostMortem import PostMortem
|
2 |
|
|
|
3 |
spiga |
1.1 |
from crab_util import *
|
4 |
|
|
import common
|
5 |
mcinquil |
1.5 |
import string, os
|
6 |
|
|
|
7 |
|
|
from ProdCommon.Storage.SEAPI.SElement import SElement
|
8 |
|
|
from ProdCommon.Storage.SEAPI.SBinterface import SBinterface
|
9 |
|
|
|
10 |
spiga |
1.6 |
class PostMortemServer(PostMortem):
|
11 |
mcinquil |
1.5 |
def __init__(self, cfg_params, nj_list):
|
12 |
|
|
|
13 |
|
|
PostMortem.__init__(self, cfg_params, nj_list)
|
14 |
|
|
|
15 |
|
|
# init client server params...
|
16 |
|
|
CliServerParams(self)
|
17 |
|
|
|
18 |
|
|
if self.storage_path[0]!='/':
|
19 |
|
|
self.storage_path = '/'+self.storage_path
|
20 |
|
|
|
21 |
spiga |
1.1 |
return
|
22 |
|
|
|
23 |
mcinquil |
1.5 |
def collectLogging(self):
|
24 |
spiga |
1.6 |
# get updated status from server
|
25 |
|
|
try:
|
26 |
|
|
from StatusServer import StatusServer
|
27 |
|
|
stat = StatusServer(self.cfg_params)
|
28 |
|
|
stat.resynchClientSide()
|
29 |
|
|
except:
|
30 |
|
|
pass
|
31 |
mcinquil |
1.5 |
|
32 |
|
|
#create once storage interaction object
|
33 |
|
|
seEl = None
|
34 |
|
|
loc = None
|
35 |
|
|
try:
|
36 |
|
|
seEl = SElement(self.storage_name, self.storage_proto, self.storage_port)
|
37 |
|
|
except Exception, ex:
|
38 |
spiga |
1.8 |
common.logger.debug( str(ex))
|
39 |
mcinquil |
1.5 |
msg = "ERROR: Unable to create SE source interface \n"
|
40 |
|
|
raise CrabException(msg)
|
41 |
|
|
try:
|
42 |
|
|
loc = SElement("localhost", "local")
|
43 |
|
|
except Exception, ex:
|
44 |
spiga |
1.8 |
common.logger.debug( str(ex))
|
45 |
mcinquil |
1.5 |
msg = "ERROR: Unable to create destination interface \n"
|
46 |
|
|
raise CrabException(msg)
|
47 |
spiga |
1.1 |
|
48 |
mcinquil |
1.5 |
## coupling se interfaces
|
49 |
|
|
sbi = SBinterface( seEl, loc )
|
50 |
spiga |
1.1 |
|
51 |
mcinquil |
1.5 |
## get the list of jobs to get logging.info skimmed by failed status
|
52 |
|
|
logginable = self.skimDeadList()
|
53 |
|
|
|
54 |
|
|
## iter over each asked job and print warning if not in skimmed list
|
55 |
|
|
for id in self.nj_list:
|
56 |
|
|
if id not in self.all_jobs:
|
57 |
spiga |
1.8 |
common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
|
58 |
mcinquil |
1.5 |
continue
|
59 |
|
|
elif id in logginable:
|
60 |
|
|
fname = self.fname_base + str(id) + '.LoggingInfo'
|
61 |
|
|
if os.path.exists(fname):
|
62 |
spiga |
1.8 |
common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
|
63 |
mcinquil |
1.5 |
continue
|
64 |
|
|
## retrieving & processing logging info
|
65 |
|
|
if self.retrieveFile( sbi, id, fname):
|
66 |
|
|
## decode logging info
|
67 |
|
|
fl = open(fname, 'r')
|
68 |
|
|
out = "".join(fl.readlines())
|
69 |
|
|
fl.close()
|
70 |
|
|
reason = self.decodeLogging(out)
|
71 |
spiga |
1.8 |
common.logger.info('Logging info for job '+ str(id) +': '+str(reason)+'\n written to '+str(fname)+' \n' )
|
72 |
mcinquil |
1.5 |
else:
|
73 |
slacapra |
1.9 |
common.logger.info('Logging info for job '+ str(id) +' not retrieved. Tring to get loggingInfo manually')
|
74 |
|
|
PostMortem.collectLogging(self)
|
75 |
mcinquil |
1.5 |
else:
|
76 |
spiga |
1.8 |
common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
|
77 |
slacapra |
1.7 |
PostMortem.collectLogging(self)
|
78 |
mcinquil |
1.5 |
return
|
79 |
spiga |
1.1 |
|
80 |
|
|
|
81 |
mcinquil |
1.5 |
def skimDeadList(self):
|
82 |
|
|
"""
|
83 |
|
|
__skimDeadList__
|
84 |
|
|
return the list of jobs really failed: K, A
|
85 |
|
|
"""
|
86 |
|
|
skimmedlist = []
|
87 |
|
|
self.up_task = common._db.getTask( self.nj_list )
|
88 |
|
|
for job in self.up_task.jobs:
|
89 |
|
|
if job.runningJob['status'] in ['K','A']:
|
90 |
|
|
skimmedlist.append(job['jobId'])
|
91 |
|
|
return skimmedlist
|
92 |
|
|
|
93 |
|
|
def retrieveFile(self, sbi, jobid, destlog):
|
94 |
|
|
"""
|
95 |
|
|
__retrieveFile__
|
96 |
spiga |
1.1 |
|
97 |
mcinquil |
1.5 |
retrieves logging.info file from the server storage area
|
98 |
|
|
"""
|
99 |
|
|
self.taskuuid = str(common._db.queryTask('name'))
|
100 |
spiga |
1.8 |
common.logger.debug( "Task name: " + self.taskuuid)
|
101 |
spiga |
1.1 |
|
102 |
mcinquil |
1.5 |
# full remote dir
|
103 |
|
|
remotedir = os.path.join(self.storage_path, self.taskuuid)
|
104 |
|
|
remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log'
|
105 |
|
|
|
106 |
spiga |
1.8 |
common.logger.info("Starting retrieving logging-info from server " \
|
107 |
mcinquil |
1.5 |
+ str(self.storage_name) + " for job " \
|
108 |
|
|
+ str(jobid) + "...")
|
109 |
spiga |
1.1 |
|
110 |
mcinquil |
1.5 |
# retrieve logging info from storage
|
111 |
spiga |
1.8 |
common.logger.debug( "retrieving "+ str(remotelog) +" to "+ str(destlog) )
|
112 |
mcinquil |
1.5 |
try:
|
113 |
|
|
sbi.copy( remotelog, destlog)
|
114 |
|
|
except Exception, ex:
|
115 |
|
|
msg = "WARNING: Unable to retrieve logging-info file %s \n"%remotelog
|
116 |
|
|
msg += str(ex)
|
117 |
spiga |
1.8 |
common.logger.debug(msg)
|
118 |
mcinquil |
1.5 |
return False
|
119 |
|
|
# cleaning remote logging info file
|
120 |
|
|
try:
|
121 |
spiga |
1.8 |
common.logger.debug( "Cleaning remote file [%s] " + str(remotelog) )
|
122 |
mcinquil |
1.5 |
sbi.delete(remotelog)
|
123 |
|
|
except Exception, ex:
|
124 |
|
|
msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog
|
125 |
|
|
msg += str(ex)
|
126 |
spiga |
1.8 |
common.logger.debug(msg)
|
127 |
mcinquil |
1.5 |
return True
|