ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PostMortemServer.py
(Generate patch)

Comparing COMP/CRAB/python/PostMortemServer.py (file contents):
Revision 1.8 by spiga, Tue May 26 10:23:01 2009 UTC vs.
Revision 1.15 by ewv, Tue Jul 27 15:56:13 2010 UTC

# Line 9 | Line 9 | from ProdCommon.Storage.SEAPI.SBinterfac
9  
10   class PostMortemServer(PostMortem):
11      def __init__(self, cfg_params, nj_list):
12 <            
12 >
13          PostMortem.__init__(self, cfg_params, nj_list)
14  
15          # init client server params...
16 <        CliServerParams(self)        
16 >        CliServerParams(self)
17 >
18 >        self.copyTout= setLcgTimeout()
19 >        if common.scheduler.name().upper() in ['LSF', 'CAF']:
20 >            self.copyTout= ' '
21  
22          if self.storage_path[0]!='/':
23              self.storage_path = '/'+self.storage_path
24 <        
24 >
25          return
26 <    
26 >
27      def collectLogging(self):
28 <        # get updated status from server
28 >        # get updated status from server
29          try:
30              from StatusServer import StatusServer
31              stat = StatusServer(self.cfg_params)
32 <            stat.resynchClientSide()
32 >            warning_msg = stat.resynchClientSide()
33 >            if warning_msg is not None:
34 >                common.logger.info(warning_msg)
35          except:
36              pass
37  
38          #create once storage interaction object
39          seEl = None
40          loc = None
41 <        try:  
41 >        try:
42              seEl = SElement(self.storage_name, self.storage_proto, self.storage_port)
43          except Exception, ex:
44              common.logger.debug( str(ex))
# Line 46 | Line 52 | class PostMortemServer(PostMortem):
52              raise CrabException(msg)
53  
54          ## coupling se interfaces
55 <        sbi = SBinterface( seEl, loc )
55 >        sbi = SBinterface(seEl, loc, logger = common.logger.logger)
56  
57          ## get the list of jobs to get logging.info skimmed by failed status
58          logginable = self.skimDeadList()
59  
60 <        ## iter over each asked job and print warning if not in skimmed list
61 <        for id in self.nj_list:
62 <            if id not in self.all_jobs:
63 <                common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
64 <                continue
65 <            elif id in logginable:  
66 <                fname = self.fname_base + str(id) + '.LoggingInfo'
67 <                if os.path.exists(fname):
68 <                    common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
60 >        if self.storage_proto in ['globus']:
61 >            for id in self.nj_list:
62 >                if id not in self.all_jobs:
63 >                    common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
64 >                elif id not in logginable:
65 >                    common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
66 >                    PostMortem.collectOneLogging(self,id)
67 >            # construct a list of absolute paths of input files
68 >            # and the destinations to copy them to
69 >            sourcesList = []
70 >            destsList = []
71 >            self.taskuuid = str(common._db.queryTask('name'))
72 >            common.logger.debug( "Starting globus retrieval for task name: " + self.taskuuid)
73 >            remotedir = os.path.join(self.storage_path, self.taskuuid)
74 >            for i in logginable:
75 >                remotelog = remotedir + '/loggingInfo_'+str(i)+'.log'
76 >                sourcesList.append(remotelog)
77 >                fname = self.fname_base + str(i) + '.LoggingInfo'
78 >                destsList.append(fname)
79 >
80 >            # try to do the copy
81 >            copy_res = None
82 >            try:
83 >                copy_res = sbi.copy( sourcesList, destsList, opt=self.copyTout)
84 >            except Exception, ex:
85 >                msg = "WARNING: Unable to retrieve logging" #ainfo file %s \n" % osbFiles[i]
86 >                msg += str(ex)
87 >                common.logger.debug(msg)
88 >                import traceback
89 >                common.logger.debug( str(traceback.format_exc()) )
90 >            if copy_res is not None:
91 >                ## evaluating copy results
92 >                copy_err_list = []
93 >                count = 0
94 >                for ll in map(None, copy_res, sourcesList):
95 >                    exitcode = int(ll[0][0])
96 >                    if exitcode == 0:
97 >                        ## decode logging info
98 >                        fl = open(destsList[count], 'r')
99 >                        out = "".join(fl.readlines())
100 >                        fl.close()
101 >                        reason = self.decodeLogging(out)
102 >                        common.logger.info('Logging info for job '+ str(logginable[count]) +': '+str(reason)+'\n      written to '+str(destsList[count])+' \n' )
103 >                    else:
104 >                        common.logger.info('Logging info for job '+ str(logginable[count]) +' not retrieved. Tring to get loggingInfo manually')
105 >                        PostMortem.collectOneLogging(self,logginable[count])
106 >                    count += 1
107 >        else:
108 >            ## iter over each asked job and print warning if not in skimmed list
109 >            for id in self.nj_list:
110 >                if id not in self.all_jobs:
111 >                    common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
112                      continue
113 <                ## retrieving & processing logging info
114 <                if self.retrieveFile( sbi, id, fname):
115 <                    ## decode logging info
116 <                    fl = open(fname, 'r')
117 <                    out = "".join(fl.readlines())  
118 <                    fl.close()
119 <                    reason = self.decodeLogging(out)
120 <                    common.logger.info('Logging info for job '+ str(id) +': '+str(reason)+'\n      written to '+str(fname)+' \n' )
113 >                elif id in logginable:
114 >                    fname = self.fname_base + str(id) + '.LoggingInfo'
115 >                    if os.path.exists(fname):
116 >                        common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
117 >                        continue
118 >                    ## retrieving & processing logging info
119 >                    if self.retrieveFile( sbi, id, fname):
120 >                        ## decode logging info
121 >                        fl = open(fname, 'r')
122 >                        out = "".join(fl.readlines())
123 >                        fl.close()
124 >                        reason = self.decodeLogging(out)
125 >                        common.logger.info('Logging info for job '+ str(id) +': '+str(reason)+'\n      written to '+str(fname)+' \n' )
126 >                    else:
127 >                        common.logger.info('Logging info for job '+ str(id) +' not retrieved. Tring to get loggingInfo manually')
128 >                        PostMortem.collectOneLogging(self,id)
129                  else:
130 <                    common.logger.info('Logging info for job '+ str(id) +' not retrieved')
131 <            else:
75 <                common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
76 <                PostMortem.collectLogging(self)
130 >                    common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ')
131 >                    PostMortem.collectOneLogging(self,id)
132          return
133  
134  
# Line 88 | Line 143 | class PostMortemServer(PostMortem):
143              if job.runningJob['status'] in ['K','A']:
144                  skimmedlist.append(job['jobId'])
145          return skimmedlist
146 <        
146 >
147      def retrieveFile(self, sbi, jobid, destlog):
148          """
149          __retrieveFile__
# Line 98 | Line 153 | class PostMortemServer(PostMortem):
153          self.taskuuid = str(common._db.queryTask('name'))
154          common.logger.debug( "Task name: " + self.taskuuid)
155  
156 <        # full remote dir
156 >        # full remote dir
157          remotedir = os.path.join(self.storage_path, self.taskuuid)
158          remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log'
159  
# Line 115 | Line 170 | class PostMortemServer(PostMortem):
170              msg += str(ex)
171              common.logger.debug(msg)
172              return False
173 <        # cleaning remote logging info file
173 >        # cleaning remote logging info file
174          try:
175 <            common.logger.debug( "Cleaning remote file [%s] " + str(remotelog) )
175 >            common.logger.debug( "Cleaning remote file [%s] " %( str(remotelog) ) )
176              sbi.delete(remotelog)
177          except Exception, ex:
178              msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines