ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PostMortemServer.py
Revision: 1.13
Committed: Wed Apr 28 15:15:29 2010 UTC (15 years ago) by mcinquil
Content type: text/x-python
Branch: MAIN
Changes since 1.12: +3 -1 lines
Log Message:
warning message on c-s communication at the end

File Contents

# User Rev Content
1 mcinquil 1.5 from PostMortem import PostMortem
2    
3 spiga 1.1 from crab_util import *
4     import common
5 mcinquil 1.5 import string, os
6    
7     from ProdCommon.Storage.SEAPI.SElement import SElement
8     from ProdCommon.Storage.SEAPI.SBinterface import SBinterface
9    
10 spiga 1.6 class PostMortemServer(PostMortem):
11 mcinquil 1.5 def __init__(self, cfg_params, nj_list):
12    
13     PostMortem.__init__(self, cfg_params, nj_list)
14    
15     # init client server params...
16     CliServerParams(self)
17    
18 mcinquil 1.12 self.copyTout= setLcgTimeout()
19     if common.scheduler.name().upper() in ['LSF', 'CAF']:
20     self.copyTout= ' '
21    
22 mcinquil 1.5 if self.storage_path[0]!='/':
23     self.storage_path = '/'+self.storage_path
24    
25 spiga 1.1 return
26    
27 mcinquil 1.5 def collectLogging(self):
28 spiga 1.6 # get updated status from server
29     try:
30     from StatusServer import StatusServer
31     stat = StatusServer(self.cfg_params)
32 mcinquil 1.13 warning_msg = stat.resynchClientSide()
33     if warning_msg is not None:
34     common.logger.info(warning_msg)
35 spiga 1.6 except:
36     pass
37 mcinquil 1.5
38     #create once storage interaction object
39     seEl = None
40     loc = None
41     try:
42     seEl = SElement(self.storage_name, self.storage_proto, self.storage_port)
43     except Exception, ex:
44 spiga 1.8 common.logger.debug( str(ex))
45 mcinquil 1.5 msg = "ERROR: Unable to create SE source interface \n"
46     raise CrabException(msg)
47     try:
48     loc = SElement("localhost", "local")
49     except Exception, ex:
50 spiga 1.8 common.logger.debug( str(ex))
51 mcinquil 1.5 msg = "ERROR: Unable to create destination interface \n"
52     raise CrabException(msg)
53 spiga 1.1
54 mcinquil 1.5 ## coupling se interfaces
55     sbi = SBinterface( seEl, loc )
56 spiga 1.1
57 mcinquil 1.5 ## get the list of jobs to get logging.info skimmed by failed status
58     logginable = self.skimDeadList()
59    
60 mcinquil 1.12 if self.storage_proto in ['globus']:
61     for id in self.nj_list:
62     if id not in self.all_jobs:
63     common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
64     elif id not in logginable:
65     common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
66     PostMortem.collectOneLogging(self,id)
67     # construct a list of absolute paths of input files
68     # and the destinations to copy them to
69     sourcesList = []
70     destsList = []
71     self.taskuuid = str(common._db.queryTask('name'))
72     common.logger.debug( "Starting globus retrieval for task name: " + self.taskuuid)
73     remotedir = os.path.join(self.storage_path, self.taskuuid)
74     for i in logginable:
75     remotelog = remotedir + '/loggingInfo_'+str(i)+'.log'
76     sourcesList.append(remotelog)
77     fname = self.fname_base + str(i) + '.LoggingInfo'
78     destsList.append(fname)
79    
80     # try to do the copy
81     copy_res = None
82     try:
83     copy_res = sbi.copy( sourcesList, destsList, opt=self.copyTout)
84     except Exception, ex:
85     msg = "WARNING: Unable to retrieve logging info file %s \n" % osbFiles[i]
86     msg += str(ex)
87     common.logger.debug(msg)
88     import traceback
89     common.logger.debug( str(traceback.format_exc()) )
90     if copy_res is not None:
91     ## evaluating copy results
92     copy_err_list = []
93     count = 0
94     for ll in map(None, copy_res, sourcesList):
95     exitcode = int(ll[0][0])
96     if exitcode == 0:
97     ## decode logging info
98     fl = open(destsList[count], 'r')
99     out = "".join(fl.readlines())
100     fl.close()
101     reason = self.decodeLogging(out)
102     common.logger.info('Logging info for job '+ str(logginable[count]) +': '+str(reason)+'\n written to '+str(destsList[count])+' \n' )
103     else:
104     common.logger.info('Logging info for job '+ str(logginable[count]) +' not retrieved. Tring to get loggingInfo manually')
105     PostMortem.collectOneLogging(self,logginable[count])
106     count += 1
107     else:
108     ## iter over each asked job and print warning if not in skimmed list
109     for id in self.nj_list:
110     if id not in self.all_jobs:
111     common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
112 mcinquil 1.5 continue
113 mcinquil 1.12 elif id in logginable:
114     fname = self.fname_base + str(id) + '.LoggingInfo'
115     if os.path.exists(fname):
116     common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
117     continue
118     ## retrieving & processing logging info
119     if self.retrieveFile( sbi, id, fname):
120     ## decode logging info
121     fl = open(fname, 'r')
122     out = "".join(fl.readlines())
123     fl.close()
124     reason = self.decodeLogging(out)
125     common.logger.info('Logging info for job '+ str(id) +': '+str(reason)+'\n written to '+str(fname)+' \n' )
126     else:
127     common.logger.info('Logging info for job '+ str(id) +' not retrieved. Tring to get loggingInfo manually')
128     PostMortem.collectOneLogging(self,id)
129 mcinquil 1.5 else:
130 mcinquil 1.12 common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
131 slacapra 1.10 PostMortem.collectOneLogging(self,id)
132 mcinquil 1.5 return
133 spiga 1.1
134    
135 mcinquil 1.5 def skimDeadList(self):
136     """
137     __skimDeadList__
138     return the list of jobs really failed: K, A
139     """
140     skimmedlist = []
141     self.up_task = common._db.getTask( self.nj_list )
142     for job in self.up_task.jobs:
143     if job.runningJob['status'] in ['K','A']:
144     skimmedlist.append(job['jobId'])
145     return skimmedlist
146    
147     def retrieveFile(self, sbi, jobid, destlog):
148     """
149     __retrieveFile__
150 spiga 1.1
151 mcinquil 1.5 retrieves logging.info file from the server storage area
152     """
153     self.taskuuid = str(common._db.queryTask('name'))
154 spiga 1.8 common.logger.debug( "Task name: " + self.taskuuid)
155 spiga 1.1
156 mcinquil 1.5 # full remote dir
157     remotedir = os.path.join(self.storage_path, self.taskuuid)
158     remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log'
159    
160 spiga 1.8 common.logger.info("Starting retrieving logging-info from server " \
161 mcinquil 1.5 + str(self.storage_name) + " for job " \
162     + str(jobid) + "...")
163 spiga 1.1
164 mcinquil 1.5 # retrieve logging info from storage
165 spiga 1.8 common.logger.debug( "retrieving "+ str(remotelog) +" to "+ str(destlog) )
166 mcinquil 1.5 try:
167     sbi.copy( remotelog, destlog)
168     except Exception, ex:
169     msg = "WARNING: Unable to retrieve logging-info file %s \n"%remotelog
170     msg += str(ex)
171 spiga 1.8 common.logger.debug(msg)
172 mcinquil 1.5 return False
173     # cleaning remote logging info file
174     try:
175 mcinquil 1.11 common.logger.debug( "Cleaning remote file [%s] " %( str(remotelog) ) )
176 mcinquil 1.5 sbi.delete(remotelog)
177     except Exception, ex:
178     msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog
179     msg += str(ex)
180 spiga 1.8 common.logger.debug(msg)
181 mcinquil 1.5 return True