ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/StatusServer.py
Revision: 1.23
Committed: Fri Jan 4 17:30:56 2008 UTC (17 years, 4 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_1_2, CRAB_2_1_2_pre2, CRAB_2_1_2_pre1, CRAB_2_1_1, CRAB_2_1_1_pre3, CRAB_2_2_0_pre1, CRAB_2_1_1_pre1, CRAB_2_1_0, CRAB_2_1_0_pre6, CRAB_2_1_0_pre5, CRAB_2_1_0_pre4, CRAB_2_1_0_pre3, CRAB_2_1_0_pre2, CRAB_2_1_0_pre1
Branch point for: CRAB_2_1_2_br, CRAB_2_1_1_pre2
Changes since 1.22: +0 -3 lines
Log Message:
Add support for LSF/CAF direct submission
Re-establish a correct inheritance pattern for Scheruled* classes
Start to remove some unneeded try: catch: statement and replace them with appropriate if:then:
Erase any use of cfg_param as a common block (expecially in conjuction with apmon) and replace it with the user of task DB
Several minor cleanup

File Contents

# User Rev Content
1 spiga 1.1 from Actor import *
2     from crab_util import *
3     import common
4     from ApmonIf import ApmonIf
5     import time
6    
7     import xml.dom.minidom
8     import TaskDB
9    
10     class StatusServer(Actor):
11    
12     def __init__(self, cfg_params,):
13     self.cfg_params = cfg_params
14 mcinquil 1.2
15 mcinquil 1.8 self.countNotSubmit = 0
16 mcinquil 1.2 self.countSubmit = 0
17     self.countSubmitting = 0
18 mcinquil 1.11 self.countWait = 0
19 mcinquil 1.2 self.countDone = 0
20     self.countReady = 0
21     self.countSched = 0
22     self.countRun = 0
23     self.countAbort = 0
24     self.countCancel = 0
25 mcinquil 1.12 self.countRet = 0
26 mcinquil 1.6 self.countKilled = 0
27 mcinquil 1.2 self.countCleared = 0
28     self.countToTjob = 0
29    
30 spiga 1.3 try:
31     self.server_name = self.cfg_params['CRAB.server_name'] # gsiftp://pcpg01.cern.ch/data/SEDir/
32 mcinquil 1.22 if not self.server_name.endswith("/"):
33     self.server_name = self.server_name + "/"
34 spiga 1.3 except KeyError:
35     msg = 'No server selected ...'
36     msg = msg + 'Please specify a server in the crab cfg file'
37     raise CrabException(msg)
38    
39 spiga 1.1 return
40 mcinquil 1.2
41     def translateStatus(self, status):
42     """
43     simmetric as server
44     """
45    
46 mcinquil 1.4 stateConverting = {'Running': 'R', 'Aborted': 'A', 'Done': 'D', 'Done (Failed)': 'D',\
47 mcinquil 1.12 'Cleared': 'D', 'Cancelled': 'K', 'Killed': 'K', 'NotSubmitted': 'C',\
48 spiga 1.13 'Retrieving by the server': 'R' }
49 mcinquil 1.2
50     if status in stateConverting:
51     return stateConverting[status]
52     return None
53    
54    
55 spiga 1.1 def run(self):
56     """
57     The main method of the class: check the status of the task
58     """
59     common.logger.debug(5, "status server::run() called")
60     start = time.time()
61    
62     totalCreatedJobs = 0
63 mcinquil 1.16 flagSubmit = 0
64 spiga 1.1 for nj in range(common.jobDB.nJobs()):
65 mcinquil 1.17 if (common.jobDB.status(nj) != 'C'):
66 spiga 1.1 totalCreatedJobs +=1
67 mcinquil 1.16 flagSubmit = 1
68 spiga 1.1
69     if not flagSubmit:
70 mcinquil 1.17 common.logger.message("Your jobs are not yet submitted!")
71 mcinquil 1.16 common.logger.message("Before checking the status submit your jobs with the command: crab -submit all -c\n")
72 spiga 1.1 return
73    
74     common.scheduler.checkProxy()
75    
76     common.taskDB.load()
77     WorkDirName =os.path.basename(os.path.split(common.work_space.topDir())[0])
78     projectUniqName = 'crab_'+str(WorkDirName)+'_'+common.taskDB.dict('TasKUUID')
79     try:
80     common.logger.message ("Checking the status...\n")
81 mcinquil 1.17 cmd = 'lcg-cp --vo cms gsiftp://' + str(self.server_name) + str(projectUniqName)+\
82     '/res/xmlReportFile.xml file://'+common.work_space.resDir()+'xmlReportFile.xml'
83 mcinquil 1.2 common.logger.debug(6, cmd)
84 spiga 1.1 os.system(cmd +' >& /dev/null')
85    
86     except:
87 mcinquil 1.10 #msg = ("task status not yet available")
88     msg = "The server is managing your task."
89     msg += "\n A detailed report will be ready soon.\n"
90 spiga 1.1 raise CrabException(msg)
91    
92     try:
93 farinafa 1.21 # file = open(common.work_space.resDir()+"xmlReportFile.xml", "r")
94 spiga 1.1 doc = xml.dom.minidom.parse(common.work_space.resDir()+ "xmlReportFile.xml" )
95    
96     except:
97 mcinquil 1.10 #msg = ("problems reading report file")
98     msg = "The server is managing your task."
99     msg += "\n A detailed report will be ready soon.\n"
100 spiga 1.1 raise CrabException(msg)
101    
102 mcinquil 1.2 ### <Job status='Submitted' job_exit='NULL' id='1' exe_exit='NULL'/>
103    
104 farinafa 1.21 task = doc.childNodes[0].childNodes[1].getAttribute("taskName")
105 mcinquil 1.2 self.countToTjob = int(doc.childNodes[0].childNodes[1].getAttribute("totJob") )
106    
107     addTree = 3
108    
109     common.jobDB.load()
110    
111     if doc.childNodes[0].childNodes[3].getAttribute("id") == "all":
112     if doc.childNodes[0].childNodes[3].getAttribute("status") == "Submitted":
113     self.countSubmitting = common.jobDB.nJobs()
114     for nj in range(common.jobDB.nJobs()):
115     common.jobDB.setStatus(nj, 'S')
116 mcinquil 1.6 elif doc.childNodes[0].childNodes[3].getAttribute("status") == "Killed":
117     self.countKilled = common.jobDB.nJobs()
118     for nj in range(common.jobDB.nJobs()):
119     common.jobDB.setStatus(nj, 'K')
120 mcinquil 1.14 self.countKilled = common.jobDB.nJobs()
121 mcinquil 1.8 elif doc.childNodes[0].childNodes[3].getAttribute("status") == "NotSubmitted":
122     self.countNotSubmit = common.jobDB.nJobs()
123 mcinquil 1.2 for nj in range(common.jobDB.nJobs()):
124 mcinquil 1.8 common.jobDB.setStatus(nj, 'C')
125 mcinquil 1.2 self.countToTjob = common.jobDB.nJobs()
126 spiga 1.1 else:
127 mcinquil 1.2 printline = ''
128 mcinquil 1.18 printline+= "%-10s %-24s %-20s %-20s %-18s %-20s" % ('JOBID','STATUS','SITE','JOB_EXIT_STATUS','EXE_EXIT_CODE','RESUBMIT')
129 mcinquil 1.2 print printline
130 mcinquil 1.16 print '---------------------------------------------------------------------------------------------------------'
131 mcinquil 1.2
132     for job in range( self.countToTjob ):
133     idJob = doc.childNodes[0].childNodes[job+addTree].getAttribute("id")
134     stato = doc.childNodes[0].childNodes[job+addTree].getAttribute("status")
135     exe_exit_code = doc.childNodes[0].childNodes[job+addTree].getAttribute("job_exit")
136     job_exit_status = doc.childNodes[0].childNodes[job+addTree].getAttribute("exe_exit")
137 mcinquil 1.5 cleared = doc.childNodes[0].childNodes[job+addTree].getAttribute("cleared")
138 mcinquil 1.15
139     try:
140     site = doc.childNodes[0].childNodes[job+addTree].getAttribute("site")
141     resub = doc.childNodes[0].childNodes[job+addTree].getAttribute("resubmit")
142 mcinquil 1.17 if site == "NULL" or site=="None":
143 mcinquil 1.15 site=''
144 mcinquil 1.17 if resub == "NULL" or resub =="None" or resub == "0":
145 mcinquil 1.15 resub=''
146     if stato == "Killed":
147     resub=''
148     except Excpetion, ex:
149 mcinquil 1.17 common.logger.message ("Problem reading report file!")
150 mcinquil 1.15 common.logger.debug( 1 , str(ex) )
151    
152 mcinquil 1.2 jobDbStatus = self.translateStatus(stato)
153     if jobDbStatus != None:
154     common.logger.debug(5, '*** Updating jobdb for job %s ***' %idJob)
155     if common.jobDB.status( str(int(idJob)-1) ) != "Y":
156 mcinquil 1.5 if jobDbStatus == 'D' and int(cleared) != 1:#exe_exit_code =='' and job_exit_status=='':
157     ## 'Done' but not yet cleared (server side) still showing 'Running'
158 mcinquil 1.12 ##stato = 'Running'
159 spiga 1.13 stato = 'Retrieving by the server' ## changed - is this user friendly?
160 mcinquil 1.5 jobDbStatus = 'R'
161 mcinquil 1.2 common.jobDB.setStatus( str(int(idJob)-1), self.translateStatus(stato) )
162     else:
163     stato = "Cleared"
164     common.jobDB.setExitStatus( str(int(idJob)-1), job_exit_status )
165 mcinquil 1.4 if stato != "Done" and stato != "Cleared" and stato != "Aborted" and stato != "Done (Failed)":
166 mcinquil 1.19 sitotemp = site
167     if stato == "Resubmitting by the server":
168     sitotemp = ""
169     print "%-10s %-24s %-20s %-20s %-18s %-20s" % (idJob,stato,sitotemp,'','',resub)
170 mcinquil 1.2 else:
171 mcinquil 1.18 print "%-10s %-24s %-20s %-20s %-18s %-20s" % (idJob,stato,site,exe_exit_code,job_exit_status,resub)
172 mcinquil 1.2
173     if stato == 'Running':
174     self.countRun += 1
175     elif stato == 'Aborted':
176     self.countAbort += 1
177 mcinquil 1.11 elif stato == 'Done' or stato == 'Done (Failed)':
178 mcinquil 1.2 self.countDone += 1
179     elif stato == 'Cancelled':
180     self.countCancel += 1
181     elif stato == 'Submitted':
182     self.countSubmit += 1
183     elif stato == 'Submitting':
184     self.countSubmitting += 1
185     elif stato == 'Ready':
186     self.countReady += 1
187     elif stato == 'Scheduled':
188     self.countSched += 1
189     elif stato == 'Cleared':
190     self.countCleared += 1
191 mcinquil 1.8 elif stato == 'NotSubmitted':
192 mcinquil 1.18 self.countNotSubmit += 1
193 mcinquil 1.11 elif stato == 'Waiting':
194     self.countWait += 1
195 spiga 1.13 elif stato == 'Retrieving by the server':
196 mcinquil 1.12 self.countRet += 1
197 mcinquil 1.14 elif stato == 'Killed':
198     self.countKilled += 1
199 mcinquil 1.2
200     addTree += 1
201     common.jobDB.save()
202    
203     self.PrintReport_()
204    
205    
206     def PrintReport_(self) :
207    
208     """ Report #jobs for each status """
209    
210    
211     print ''
212     print ">>>>>>>>> %i Total Jobs " % (self.countToTjob)
213     print ''
214    
215     if (self.countSubmitting != 0) :
216     print ">>>>>>>>> %i Jobs Submitting by the server" % (self.countSubmitting)
217 mcinquil 1.8 if (self.countNotSubmit != 0):
218     print ">>>>>>>>> %i Jobs Not Submitted to the grid" % (self.countNotSubmit)
219 mcinquil 1.2 if (self.countSubmit != 0):
220     print ">>>>>>>>> %i Jobs Submitted" % (self.countSubmit)
221 mcinquil 1.11 if (self.countWait != 0):
222     print ">>>>>>>>> %i Jobs Waiting" % (self.countWait)
223 mcinquil 1.6 if (self.countReady != 0):
224     print ">>>>>>>>> %i Jobs Ready" % (self.countReady)
225 mcinquil 1.2 if (self.countSched != 0):
226     print ">>>>>>>>> %i Jobs Scheduled" % (self.countSched)
227 mcinquil 1.14 if (self.countRun != 0):
228     print ">>>>>>>>> %i Jobs Running" % (self.countRun)
229 mcinquil 1.12 if (self.countRet != 0):
230 spiga 1.13 print ">>>>>>>>> %i Jobs Retrieving by the server" % (self.countRet)
231 mcinquil 1.6 if (self.countDone != 0):
232     print ">>>>>>>>> %i Jobs Done" % (self.countDone)
233     print " Retrieve them with: crab -getoutput -continue"
234     if (self.countKilled != 0):
235     print ">>>>>>>>> %i Jobs Killed" % (self.countKilled)
236 mcinquil 1.14 # print " Retrieve more information with: crab -postMortem -continue"
237 mcinquil 1.6 if (self.countAbort != 0):
238     print ">>>>>>>>> %i Jobs Aborted" % (self.countAbort)
239 mcinquil 1.2 if (self.countCleared != 0):
240 farinafa 1.9 print ">>>>>>>>> %i Jobs Cleared" % (self.countCleared)
241    
242     countUnderMngmt = self.countToTjob - (self.countSubmitting+ self.countNotSubmit + self.countSubmit)
243 mcinquil 1.14 countUnderMngmt -= (self.countReady + self.countSched + self.countRun + self.countDone + self.countRet)
244 mcinquil 1.11 countUnderMngmt -= (self.countKilled + self.countAbort + self.countCleared + self.countWait)
245 farinafa 1.9 if (countUnderMngmt != 0):
246     print ">>>>>>>>> %i Jobs Waiting or Under Server Management" % (countUnderMngmt)
247    
248 mcinquil 1.2 print ''
249     pass
250 spiga 1.1