ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PostMortemServer.py
Revision: 1.15
Committed: Tue Jul 27 15:56:13 2010 UTC (14 years, 9 months ago) by ewv
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_9_1, CRAB_2_9_1_pre2, CRAB_2_9_1_pre1, CRAB_2_9_0, CRAB_2_9_0_pre2, CRAB_2_9_0_pre1, CRAB_2_8_8, CRAB_2_8_8_pre1, CRAB_2_8_7_patch3, CRAB_2_8_7_patch2, CRAB_2_8_7_patch1, CRAB_2_8_7, CRAB_2_8_7_pre2, CRAB_2_8_7_pre1, CRAB_2_8_6, CRAB_2_8_6_pre1, CRAB_2_8_5_patch3, CRAB_2_8_5_patch2, CRAB_2_8_5_patch1, CRAB_2_8_5, CRAB_2_8_5_pre5, CRAB_2_8_5_pre4, CRAB_2_8_5_pre3, CRAB_2_8_4_patch3, CRAB_2_8_5_pre2, CRAB_2_8_4_patch2, CRAB_2_8_5_pre1, CRAB_2_8_4_patch1, CRAB_2_8_4, CRAB_2_8_4_pre5, CRAB_2_8_4_pre4, CRAB_2_8_4_pre3, CRAB_2_8_4_pre2, CRAB_2_8_4_pre1, CRAB_2_8_3, CRAB_2_8_3_pre4, CRAB_2_8_3_pre3, CRAB_2_8_3_pre2, CRAB_2_8_3_pre1, CRAB_2_8_2_patch1, CRAB_2_8_2, CRAB_2_8_2_pre5, CRAB_2_8_2_pre4, CRAB_2_8_2_pre3, CRAB_2_8_2_pre2, CRAB_2_8_2_pre1, CRAB_2_8_1, CRAB_2_8_0, CRAB_2_8_0_pre1, CRAB_2_7_10_pre3, CRAB_2_7_9_patch2_pre1, CRAB_2_7_10_pre2, CRAB_2_7_10_pre1, CRAB_2_7_9_patch1, CRAB_2_7_9, CRAB_2_7_9_pre5, CRAB_2_7_9_pre4, CRAB_2_7_9_pre3, CRAB_2_7_9_pre2, CRAB_2_7_8_patch2, CRAB_2_7_9_pre1, CRAB_2_7_8_patch2_pre1, CRAB_2_7_8_patch1, CRAB_2_7_8_patch1_pre1, CRAB_2_7_8, CRAB_2_7_8_pre3, CRAB_2_7_8_pre2, CRAB_2_7_8_dash3, CRAB_2_7_8_dash2, CRAB_2_7_8_dash, CRAB_2_7_7_patch1, CRAB_2_7_7_patch1_pre1, CRAB_2_7_8_pre1, CRAB_2_7_7, CRAB_2_7_7_pre2, CRAB_2_7_7_pre1, CRAB_2_7_6_patch1, CRAB_2_7_6, CRAB_2_7_6_pre1, CRAB_2_7_5_patch1, CRAB_2_7_5, CRAB_2_7_5_pre3, CRAB_2_7_5_pre2, CRAB_2_7_5_pre1, CRAB_2_7_4_patch1, CRAB_2_7_4, CRAB_2_7_4_pre6, CRAB_2_7_4_pre5, CRAB_2_7_4_pre4, HEAD
Changes since 1.14: +13 -13 lines
Log Message:
Pass logger to SBinterface, fix bug 59901

File Contents

# Content
1 from PostMortem import PostMortem
2
3 from crab_util import *
4 import common
5 import string, os
6
7 from ProdCommon.Storage.SEAPI.SElement import SElement
8 from ProdCommon.Storage.SEAPI.SBinterface import SBinterface
9
10 class PostMortemServer(PostMortem):
11 def __init__(self, cfg_params, nj_list):
12
13 PostMortem.__init__(self, cfg_params, nj_list)
14
15 # init client server params...
16 CliServerParams(self)
17
18 self.copyTout= setLcgTimeout()
19 if common.scheduler.name().upper() in ['LSF', 'CAF']:
20 self.copyTout= ' '
21
22 if self.storage_path[0]!='/':
23 self.storage_path = '/'+self.storage_path
24
25 return
26
27 def collectLogging(self):
28 # get updated status from server
29 try:
30 from StatusServer import StatusServer
31 stat = StatusServer(self.cfg_params)
32 warning_msg = stat.resynchClientSide()
33 if warning_msg is not None:
34 common.logger.info(warning_msg)
35 except:
36 pass
37
38 #create once storage interaction object
39 seEl = None
40 loc = None
41 try:
42 seEl = SElement(self.storage_name, self.storage_proto, self.storage_port)
43 except Exception, ex:
44 common.logger.debug( str(ex))
45 msg = "ERROR: Unable to create SE source interface \n"
46 raise CrabException(msg)
47 try:
48 loc = SElement("localhost", "local")
49 except Exception, ex:
50 common.logger.debug( str(ex))
51 msg = "ERROR: Unable to create destination interface \n"
52 raise CrabException(msg)
53
54 ## coupling se interfaces
55 sbi = SBinterface(seEl, loc, logger = common.logger.logger)
56
57 ## get the list of jobs to get logging.info skimmed by failed status
58 logginable = self.skimDeadList()
59
60 if self.storage_proto in ['globus']:
61 for id in self.nj_list:
62 if id not in self.all_jobs:
63 common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
64 elif id not in logginable:
65 common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
66 PostMortem.collectOneLogging(self,id)
67 # construct a list of absolute paths of input files
68 # and the destinations to copy them to
69 sourcesList = []
70 destsList = []
71 self.taskuuid = str(common._db.queryTask('name'))
72 common.logger.debug( "Starting globus retrieval for task name: " + self.taskuuid)
73 remotedir = os.path.join(self.storage_path, self.taskuuid)
74 for i in logginable:
75 remotelog = remotedir + '/loggingInfo_'+str(i)+'.log'
76 sourcesList.append(remotelog)
77 fname = self.fname_base + str(i) + '.LoggingInfo'
78 destsList.append(fname)
79
80 # try to do the copy
81 copy_res = None
82 try:
83 copy_res = sbi.copy( sourcesList, destsList, opt=self.copyTout)
84 except Exception, ex:
85 msg = "WARNING: Unable to retrieve logging" #ainfo file %s \n" % osbFiles[i]
86 msg += str(ex)
87 common.logger.debug(msg)
88 import traceback
89 common.logger.debug( str(traceback.format_exc()) )
90 if copy_res is not None:
91 ## evaluating copy results
92 copy_err_list = []
93 count = 0
94 for ll in map(None, copy_res, sourcesList):
95 exitcode = int(ll[0][0])
96 if exitcode == 0:
97 ## decode logging info
98 fl = open(destsList[count], 'r')
99 out = "".join(fl.readlines())
100 fl.close()
101 reason = self.decodeLogging(out)
102 common.logger.info('Logging info for job '+ str(logginable[count]) +': '+str(reason)+'\n written to '+str(destsList[count])+' \n' )
103 else:
104 common.logger.info('Logging info for job '+ str(logginable[count]) +' not retrieved. Tring to get loggingInfo manually')
105 PostMortem.collectOneLogging(self,logginable[count])
106 count += 1
107 else:
108 ## iter over each asked job and print warning if not in skimmed list
109 for id in self.nj_list:
110 if id not in self.all_jobs:
111 common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
112 continue
113 elif id in logginable:
114 fname = self.fname_base + str(id) + '.LoggingInfo'
115 if os.path.exists(fname):
116 common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
117 continue
118 ## retrieving & processing logging info
119 if self.retrieveFile( sbi, id, fname):
120 ## decode logging info
121 fl = open(fname, 'r')
122 out = "".join(fl.readlines())
123 fl.close()
124 reason = self.decodeLogging(out)
125 common.logger.info('Logging info for job '+ str(id) +': '+str(reason)+'\n written to '+str(fname)+' \n' )
126 else:
127 common.logger.info('Logging info for job '+ str(id) +' not retrieved. Tring to get loggingInfo manually')
128 PostMortem.collectOneLogging(self,id)
129 else:
130 common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
131 PostMortem.collectOneLogging(self,id)
132 return
133
134
135 def skimDeadList(self):
136 """
137 __skimDeadList__
138 return the list of jobs really failed: K, A
139 """
140 skimmedlist = []
141 self.up_task = common._db.getTask( self.nj_list )
142 for job in self.up_task.jobs:
143 if job.runningJob['status'] in ['K','A']:
144 skimmedlist.append(job['jobId'])
145 return skimmedlist
146
147 def retrieveFile(self, sbi, jobid, destlog):
148 """
149 __retrieveFile__
150
151 retrieves logging.info file from the server storage area
152 """
153 self.taskuuid = str(common._db.queryTask('name'))
154 common.logger.debug( "Task name: " + self.taskuuid)
155
156 # full remote dir
157 remotedir = os.path.join(self.storage_path, self.taskuuid)
158 remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log'
159
160 common.logger.info("Starting retrieving logging-info from server " \
161 + str(self.storage_name) + " for job " \
162 + str(jobid) + "...")
163
164 # retrieve logging info from storage
165 common.logger.debug( "retrieving "+ str(remotelog) +" to "+ str(destlog) )
166 try:
167 sbi.copy( remotelog, destlog)
168 except Exception, ex:
169 msg = "WARNING: Unable to retrieve logging-info file %s \n"%remotelog
170 msg += str(ex)
171 common.logger.debug(msg)
172 return False
173 # cleaning remote logging info file
174 try:
175 common.logger.debug( "Cleaning remote file [%s] " %( str(remotelog) ) )
176 sbi.delete(remotelog)
177 except Exception, ex:
178 msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog
179 msg += str(ex)
180 common.logger.debug(msg)
181 return True