1 |
mcinquil |
1.5 |
from PostMortem import PostMortem
|
2 |
|
|
|
3 |
spiga |
1.1 |
from crab_util import *
|
4 |
|
|
import common
|
5 |
mcinquil |
1.5 |
import string, os
|
6 |
|
|
|
7 |
|
|
from ProdCommon.Storage.SEAPI.SElement import SElement
|
8 |
|
|
from ProdCommon.Storage.SEAPI.SBinterface import SBinterface
|
9 |
|
|
|
10 |
spiga |
1.6 |
class PostMortemServer(PostMortem):
|
11 |
mcinquil |
1.5 |
def __init__(self, cfg_params, nj_list):
|
12 |
|
|
|
13 |
|
|
PostMortem.__init__(self, cfg_params, nj_list)
|
14 |
|
|
|
15 |
|
|
# init client server params...
|
16 |
|
|
CliServerParams(self)
|
17 |
|
|
|
18 |
|
|
if self.storage_path[0]!='/':
|
19 |
|
|
self.storage_path = '/'+self.storage_path
|
20 |
|
|
|
21 |
spiga |
1.1 |
return
|
22 |
|
|
|
23 |
mcinquil |
1.5 |
def collectLogging(self):
|
24 |
spiga |
1.6 |
# get updated status from server
|
25 |
|
|
try:
|
26 |
|
|
from StatusServer import StatusServer
|
27 |
|
|
stat = StatusServer(self.cfg_params)
|
28 |
|
|
stat.resynchClientSide()
|
29 |
|
|
except:
|
30 |
|
|
pass
|
31 |
mcinquil |
1.5 |
|
32 |
|
|
#create once storage interaction object
|
33 |
|
|
seEl = None
|
34 |
|
|
loc = None
|
35 |
|
|
try:
|
36 |
|
|
seEl = SElement(self.storage_name, self.storage_proto, self.storage_port)
|
37 |
|
|
except Exception, ex:
|
38 |
|
|
common.logger.debug(1, str(ex))
|
39 |
|
|
msg = "ERROR: Unable to create SE source interface \n"
|
40 |
|
|
raise CrabException(msg)
|
41 |
|
|
try:
|
42 |
|
|
loc = SElement("localhost", "local")
|
43 |
|
|
except Exception, ex:
|
44 |
|
|
common.logger.debug(1, str(ex))
|
45 |
|
|
msg = "ERROR: Unable to create destination interface \n"
|
46 |
|
|
raise CrabException(msg)
|
47 |
spiga |
1.1 |
|
48 |
mcinquil |
1.5 |
## coupling se interfaces
|
49 |
|
|
sbi = SBinterface( seEl, loc )
|
50 |
spiga |
1.1 |
|
51 |
mcinquil |
1.5 |
## get the list of jobs to get logging.info skimmed by failed status
|
52 |
|
|
logginable = self.skimDeadList()
|
53 |
|
|
|
54 |
|
|
## iter over each asked job and print warning if not in skimmed list
|
55 |
|
|
for id in self.nj_list:
|
56 |
|
|
if id not in self.all_jobs:
|
57 |
|
|
common.logger.message('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
|
58 |
|
|
continue
|
59 |
|
|
elif id in logginable:
|
60 |
|
|
fname = self.fname_base + str(id) + '.LoggingInfo'
|
61 |
|
|
if os.path.exists(fname):
|
62 |
|
|
common.logger.message('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
|
63 |
|
|
continue
|
64 |
|
|
## retrieving & processing logging info
|
65 |
|
|
if self.retrieveFile( sbi, id, fname):
|
66 |
|
|
## decode logging info
|
67 |
|
|
fl = open(fname, 'r')
|
68 |
|
|
out = "".join(fl.readlines())
|
69 |
|
|
fl.close()
|
70 |
|
|
reason = self.decodeLogging(out)
|
71 |
|
|
common.logger.message('Logging info for job '+ str(id) +': '+str(reason)+'\n written to '+str(fname)+' \n' )
|
72 |
|
|
else:
|
73 |
|
|
common.logger.message('Logging info for job '+ str(id) +' not retrieved')
|
74 |
|
|
else:
|
75 |
|
|
common.logger.message('Warning: job # ' + str(id) + ' not killed or aborted! Not possible to ask for postMortem ')
|
76 |
|
|
return
|
77 |
spiga |
1.1 |
|
78 |
|
|
|
79 |
mcinquil |
1.5 |
def skimDeadList(self):
|
80 |
|
|
"""
|
81 |
|
|
__skimDeadList__
|
82 |
|
|
return the list of jobs really failed: K, A
|
83 |
|
|
"""
|
84 |
|
|
skimmedlist = []
|
85 |
|
|
self.up_task = common._db.getTask( self.nj_list )
|
86 |
|
|
for job in self.up_task.jobs:
|
87 |
|
|
if job.runningJob['status'] in ['K','A']:
|
88 |
|
|
skimmedlist.append(job['jobId'])
|
89 |
|
|
return skimmedlist
|
90 |
|
|
|
91 |
|
|
def retrieveFile(self, sbi, jobid, destlog):
|
92 |
|
|
"""
|
93 |
|
|
__retrieveFile__
|
94 |
spiga |
1.1 |
|
95 |
mcinquil |
1.5 |
retrieves logging.info file from the server storage area
|
96 |
|
|
"""
|
97 |
|
|
self.taskuuid = str(common._db.queryTask('name'))
|
98 |
|
|
common.logger.debug(3, "Task name: " + self.taskuuid)
|
99 |
spiga |
1.1 |
|
100 |
mcinquil |
1.5 |
# full remote dir
|
101 |
|
|
remotedir = os.path.join(self.storage_path, self.taskuuid)
|
102 |
|
|
remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log'
|
103 |
|
|
|
104 |
|
|
common.logger.message("Starting retrieving logging-info from server " \
|
105 |
|
|
+ str(self.storage_name) + " for job " \
|
106 |
|
|
+ str(jobid) + "...")
|
107 |
spiga |
1.1 |
|
108 |
mcinquil |
1.5 |
# retrieve logging info from storage
|
109 |
|
|
common.logger.debug(1, "retrieving "+ str(remotelog) +" to "+ str(destlog) )
|
110 |
|
|
try:
|
111 |
|
|
sbi.copy( remotelog, destlog)
|
112 |
|
|
except Exception, ex:
|
113 |
|
|
msg = "WARNING: Unable to retrieve logging-info file %s \n"%remotelog
|
114 |
|
|
msg += str(ex)
|
115 |
|
|
common.logger.debug(1,msg)
|
116 |
|
|
return False
|
117 |
|
|
# cleaning remote logging info file
|
118 |
|
|
try:
|
119 |
|
|
common.logger.debug(5, "Cleaning remote file [%s] " + str(remotelog) )
|
120 |
|
|
sbi.delete(remotelog)
|
121 |
|
|
except Exception, ex:
|
122 |
|
|
msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog
|
123 |
|
|
msg += str(ex)
|
124 |
|
|
common.logger.debug(5,msg)
|
125 |
|
|
return True
|