ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PostMortemServer.py
(Generate patch)

Comparing COMP/CRAB/python/PostMortemServer.py (file contents):
Revision 1.4 by slacapra, Fri Jan 4 17:30:56 2008 UTC vs.
Revision 1.5 by mcinquil, Fri Jun 6 13:04:15 2008 UTC

# Line 1 | Line 1
1 < from Actor import *
1 > from PostMortem import PostMortem
2 > from StatusServer import StatusServer
3 >
4   from crab_util import *
5   import common
6 < from ApmonIf import ApmonIf
7 < import time
8 < from ProgressBar import ProgressBar
9 < from TerminalController import TerminalController
10 <
11 < class PostMortemServer(Actor):
12 <
13 <    def __init__(self, cfg_params,):
14 <        self.cfg_params = cfg_params
15 <        try:  
16 <            self.server_name = self.cfg_params['CRAB.server_name'] # gsiftp://pcpg01.cern.ch/data/SEDir/
17 <            if not self.server_name.endswith("/"):
18 <                self.server_name = self.server_name + "/"
19 <        except KeyError:
20 <            msg = 'No server selected ...'
21 <            msg = msg + 'Please specify a server in the crab cfg file'
20 <            raise CrabException(msg)
6 > import string, os
7 >
8 > from ProdCommon.Storage.SEAPI.SElement import SElement
9 > from ProdCommon.Storage.SEAPI.SBinterface import SBinterface
10 >
11 > class PostMortemServer(PostMortem, StatusServer):
12 >    def __init__(self, cfg_params, nj_list):
13 >            
14 >        PostMortem.__init__(self, cfg_params, nj_list)
15 >
16 >        # init client server params...
17 >        CliServerParams(self)        
18 >
19 >        if self.storage_path[0]!='/':
20 >            self.storage_path = '/'+self.storage_path
21 >        
22          return
23      
24 <    def run(self):
25 <        """
26 <        The main method of the class: retrieve the post mortem output from server
27 <        """
28 <        common.logger.debug(5, "PostMortem server::run() called")
24 >    def collectLogging(self):
25 >        # get updated status from server #inherited from StatusServer
26 >        self.resynchClientSide()
27 >
28 >        #create once storage interaction object
29 >        seEl = None
30 >        loc = None
31 >        try:  
32 >            seEl = SElement(self.storage_name, self.storage_proto, self.storage_port)
33 >        except Exception, ex:
34 >            common.logger.debug(1, str(ex))
35 >            msg = "ERROR: Unable to create SE source interface \n"
36 >            raise CrabException(msg)
37 >        try:
38 >            loc = SElement("localhost", "local")
39 >        except Exception, ex:
40 >            common.logger.debug(1, str(ex))
41 >            msg = "ERROR: Unable to create destination interface \n"
42 >            raise CrabException(msg)
43  
44 <        start = time.time()
44 >        ## coupling se interfaces
45 >        sbi = SBinterface( seEl, loc )
46  
47 <        common.scheduler.checkProxy()
47 >        ## get the list of jobs to get logging.info skimmed by failed status
48 >        logginable = self.skimDeadList()
49  
50 <        common.taskDB.load()
51 <        WorkDirName =os.path.basename(os.path.split(common.work_space.topDir())[0])
52 <        projectUniqName = 'crab_'+str(WorkDirName)+'_'+common.taskDB.dict("TasKUUID")    
53 <        #Need to add a check on the treashold level
54 <        # and on the task readness  TODO  
55 <        try:
56 <            ### retrieving poject from the server
57 <            common.logger.message("Retrieving the poject from the server...\n")
50 >        ## iter over each asked job and print warning if not in skimmed list
51 >        for id in self.nj_list:
52 >            if id not in self.all_jobs:
53 >                common.logger.message('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ')
54 >                continue
55 >            elif id in logginable:  
56 >                fname = self.fname_base + str(id) + '.LoggingInfo'
57 >                if os.path.exists(fname):
58 >                    common.logger.message('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update')
59 >                    continue
60 >                ## retrieving & processing logging info
61 >                if self.retrieveFile( sbi, id, fname):
62 >                    ## decode logging info
63 >                    fl = open(fname, 'r')
64 >                    out = "".join(fl.readlines())  
65 >                    fl.close()
66 >                    reason = self.decodeLogging(out)
67 >                    common.logger.message('Logging info for job '+ str(id) +': '+str(reason)+'\n      written to '+str(fname)+' \n' )
68 >                else:
69 >                    common.logger.message('Logging info for job '+ str(id) +' not retrieved')
70 >            else:
71 >                common.logger.message('Warning: job # ' + str(id) + ' not killed or aborted! Not possible to ask for postMortem ')
72 >        return
73  
42            copyHere = common.work_space.jobDir() # MATT
43            
44            cmd = 'lcg-cp --vo cms --verbose gsiftp://' + str(self.server_name) + str(projectUniqName)+'/res/failed.tgz file://'+copyHere+'failed.tgz'# MATT
45            common.logger.debug(5, cmd)
46            copyOut = os.system(cmd +' >& /dev/null')
47        except:
48            msg = ("postMortem output not yet available")
49            raise CrabException(msg)
74  
75 <        zipOut = "failed.tgz"
76 <        if os.path.exists( copyHere + zipOut ): # MATT
77 <            cwd = os.getcwd()
78 <            os.chdir( copyHere )# MATT
79 <            common.logger.debug( 5, 'tar -zxvf ' + zipOut )
80 <            cmd = 'tar -zxvf ' + zipOut
81 <            cmd += '; mv .tmpFailed/* .; rm -drf .tmpDone/'
82 <            cmd_out = runCommand(cmd)
83 <            os.chdir(cwd)
84 <            common.logger.debug( 5, 'rm -f '+copyHere+zipOut )# MATT
85 <            cmd = 'rm -f '+copyHere+zipOut# MATT
86 <            cmd_out = runCommand(cmd)
87 <
88 <            msg='Logging info for project '+str(WorkDirName)+': \n'      
89 <            msg+='written to '+copyHere+' \n'      # MATT
90 <            common.logger.message(msg)
91 <        else:
92 <            common.logger.message("Logging info is not yet ready....\n")
75 >    def skimDeadList(self):
76 >        """
77 >        __skimDeadList__
78 >        return the list of jobs really failed: K, A
79 >        """
80 >        skimmedlist = []
81 >        self.up_task = common._db.getTask( self.nj_list )
82 >        for job in self.up_task.jobs:
83 >            if job.runningJob['status'] in ['K','A']:
84 >                skimmedlist.append(job['jobId'])
85 >        return skimmedlist
86 >        
87 >    def retrieveFile(self, sbi, jobid, destlog):
88 >        """
89 >        __retrieveFile__
90 >
91 >        retrieves logging.info file from the server storage area
92 >        """
93 >        self.taskuuid = str(common._db.queryTask('name'))
94 >        common.logger.debug(3, "Task name: " + self.taskuuid)
95  
96 <        return
96 >        # full remote dir
97 >        remotedir = os.path.join(self.storage_path, self.taskuuid)
98 >        remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log'
99 >
100 >        common.logger.message("Starting retrieving logging-info from server " \
101 >                               + str(self.storage_name) + " for job " \
102 >                               + str(jobid) + "...")
103  
104 +        # retrieve logging info from storage
105 +        common.logger.debug(1, "retrieving "+ str(remotelog) +" to "+ str(destlog) )
106 +        try:
107 +            sbi.copy( remotelog, destlog)
108 +        except Exception, ex:
109 +            msg = "WARNING: Unable to retrieve logging-info file %s \n"%remotelog
110 +            msg += str(ex)
111 +            common.logger.debug(1,msg)
112 +            return False
113 +        # cleaning remote logging info file
114 +        try:
115 +            common.logger.debug(5, "Cleaning remote file [%s] " + str(remotelog) )
116 +            sbi.delete(remotelog)
117 +        except Exception, ex:
118 +            msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog
119 +            msg += str(ex)
120 +            common.logger.debug(5,msg)
121 +        return True

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines