1 |
slacapra |
1.1 |
from Actor import *
|
2 |
slacapra |
1.21 |
import common
|
3 |
spiga |
1.24 |
import string, os, time
|
4 |
spiga |
1.28 |
from crab_util import *
|
5 |
slacapra |
1.1 |
|
6 |
|
|
class Status(Actor):
|
7 |
spiga |
1.24 |
def __init__(self, *args):
|
8 |
|
|
self.cfg_params = args[0]
|
9 |
|
|
|
10 |
spiga |
1.29 |
self.xml = self.cfg_params.get("USER.xml_report",'')
|
11 |
slacapra |
1.1 |
|
12 |
|
|
return
|
13 |
|
|
|
14 |
|
|
def run(self):
|
15 |
|
|
"""
|
16 |
spiga |
1.24 |
The main method of the class: compute the status and print a report
|
17 |
slacapra |
1.1 |
"""
|
18 |
|
|
common.logger.debug(5, "Status::run() called")
|
19 |
|
|
|
20 |
spiga |
1.24 |
start = time.time()
|
21 |
spiga |
1.28 |
self.query()
|
22 |
slacapra |
1.15 |
self.PrintReport_()
|
23 |
spiga |
1.24 |
stop = time.time()
|
24 |
|
|
common.logger.debug(1, "Status Time: "+str(stop - start))
|
25 |
|
|
common.logger.write("Status Time: "+str(stop - start))
|
26 |
slacapra |
1.15 |
pass
|
27 |
|
|
|
28 |
spiga |
1.28 |
def query(self):
|
29 |
slacapra |
1.15 |
"""
|
30 |
spiga |
1.24 |
compute the status
|
31 |
slacapra |
1.15 |
"""
|
32 |
spiga |
1.24 |
common.logger.message("Checking the status of all jobs: please wait")
|
33 |
|
|
task = common._db.getTask()
|
34 |
ewv |
1.33 |
upTask = common.scheduler.queryEverything(task['id'])
|
35 |
spiga |
1.28 |
self.compute(upTask)
|
36 |
|
|
|
37 |
|
|
def compute(self, up_task):
|
38 |
ewv |
1.33 |
|
39 |
spiga |
1.24 |
toPrint=[]
|
40 |
spiga |
1.32 |
taskId= str("_".join(str(up_task['name']).split('_')[:-1]))
|
41 |
spiga |
1.37 |
self.wrapErrorList = []
|
42 |
spiga |
1.25 |
for job in up_task.jobs :
|
43 |
spiga |
1.35 |
id = str(job.runningJob['jobId'])
|
44 |
spiga |
1.24 |
jobStatus = str(job.runningJob['statusScheduler'])
|
45 |
|
|
dest = str(job.runningJob['destination']).split(':')[0]
|
46 |
|
|
exe_exit_code = str(job.runningJob['applicationReturnCode'])
|
47 |
ewv |
1.33 |
job_exit_code = str(job.runningJob['wrapperReturnCode'])
|
48 |
spiga |
1.37 |
self.wrapErrorList.append(job_exit_code)
|
49 |
spiga |
1.24 |
printline=''
|
50 |
spiga |
1.28 |
header = ''
|
51 |
spiga |
1.24 |
if dest == 'None' : dest = ''
|
52 |
|
|
if exe_exit_code == 'None' : exe_exit_code = ''
|
53 |
|
|
if job_exit_code == 'None' : job_exit_code = ''
|
54 |
|
|
printline+="%-8s %-18s %-40s %-13s %-15s" % (id,jobStatus,dest,exe_exit_code,job_exit_code)
|
55 |
|
|
toPrint.append(printline)
|
56 |
corvo |
1.16 |
|
57 |
spiga |
1.28 |
if jobStatus is not None:
|
58 |
|
|
self.dataToDash(job,id,taskId,dest,jobStatus)
|
59 |
ewv |
1.33 |
|
60 |
spiga |
1.28 |
header = ''
|
61 |
|
|
header+= "%-8s %-18s %-40s %-13s %-15s" % ('ID','STATUS','E_HOST','EXE_EXIT_CODE','JOB_EXIT_STATUS')
|
62 |
slacapra |
1.1 |
|
63 |
spiga |
1.28 |
displayReport(self,header,toPrint,self.xml)
|
64 |
spiga |
1.9 |
|
65 |
spiga |
1.28 |
return
|
66 |
spiga |
1.24 |
|
67 |
|
|
def PrintReport_(self):
|
68 |
|
|
|
69 |
spiga |
1.37 |
jobs = common._db.nJobs('list')
|
70 |
|
|
|
71 |
|
|
WrapExitCode = list(set(self.wrapErrorList))
|
72 |
slacapra |
1.1 |
|
73 |
spiga |
1.24 |
print ''
|
74 |
spiga |
1.37 |
print ">>>>>>>>> %i Total Jobs " % (len(jobs))
|
75 |
spiga |
1.24 |
print ''
|
76 |
ewv |
1.33 |
list_ID=[]
|
77 |
spiga |
1.37 |
for c in WrapExitCode:
|
78 |
|
|
list_ID = common._db.queryAttrRunJob({'wrapperReturnCode':c},'jobId')
|
79 |
spiga |
1.24 |
if len(list_ID)>0:
|
80 |
spiga |
1.37 |
print ">>>>>>>>> %i Jobs with Wrapper Exit Code : %s " % (len(list_ID), str(c))#,len(list_ID)
|
81 |
|
|
# if st == 'killed' or st == 'Aborted': print " You can resubmit them specifying JOB numbers: crab -resubmit JOB_number <Jobs list>"
|
82 |
|
|
# if st == 'Done' : print " Retrieve them with: crab -getoutput <Jobs list>"
|
83 |
|
|
# if st == 'Cleared': print " %i Jobs with EXE_EXIT_CODE: %s" % (len(common._db.queryDistJob('wrapperReturnCode')))
|
84 |
|
|
print " List of jobs: %s" % self.readableList(list_ID)
|
85 |
spiga |
1.24 |
print " "
|
86 |
|
|
|
87 |
ewv |
1.33 |
def readableList(self,rawList):
|
88 |
|
|
listString = str(rawList[0])
|
89 |
|
|
endRange = ''
|
90 |
|
|
for i in range(1,len(rawList)):
|
91 |
|
|
if rawList[i] == rawList[i-1]+1:
|
92 |
|
|
endRange = str(rawList[i])
|
93 |
|
|
else:
|
94 |
|
|
if endRange:
|
95 |
ewv |
1.34 |
listString += '-' + endRange + ',' + str(rawList[i])
|
96 |
ewv |
1.33 |
endRange = ''
|
97 |
|
|
else:
|
98 |
ewv |
1.34 |
listString += ',' + str(rawList[i])
|
99 |
ewv |
1.33 |
if endRange:
|
100 |
|
|
listString += '-' + endRange
|
101 |
|
|
endRange = ''
|
102 |
|
|
|
103 |
|
|
return listString
|
104 |
|
|
|
105 |
slacapra |
1.1 |
|
106 |
spiga |
1.28 |
def dataToDash(self,job,id,taskId,dest,jobStatus):
|
107 |
ewv |
1.33 |
|
108 |
slacapra |
1.1 |
|
109 |
spiga |
1.28 |
jid = job.runningJob['schedulerId']
|
110 |
|
|
job_status_reason = str(job.runningJob['statusReason'])
|
111 |
|
|
job_last_time = str(job.runningJob['startTime'])
|
112 |
|
|
if common.scheduler.name().upper() == 'CONDOR_G':
|
113 |
spiga |
1.30 |
WMS = 'OSG'
|
114 |
spiga |
1.28 |
self.hash = makeCksum(common.work_space.cfgFileName())
|
115 |
|
|
jobId = str(id) + '_' + self.hash + '_' + str(jid)
|
116 |
|
|
common.logger.debug(5,'JobID for ML monitoring is created for CONDOR_G scheduler:'+jobId)
|
117 |
|
|
else:
|
118 |
|
|
if common.scheduler.name() in ['lsf','caf']:
|
119 |
spiga |
1.30 |
WMS = common.scheduler.name()
|
120 |
|
|
jobId=str(id)+"_https://"+common.scheduler.name()+":/"+str(jid)+"-"+string.replace(taskId,"_","-")
|
121 |
spiga |
1.28 |
common.logger.debug(5,'JobID for ML monitoring is created for Local scheduler:'+jobId)
|
122 |
spiga |
1.30 |
else:
|
123 |
|
|
jobId = str(id) + '_' + str(jid)
|
124 |
|
|
WMS = job.runningJob['service']
|
125 |
|
|
common.logger.debug(5,'JobID for ML monitoring is created for gLite scheduler:'+jobId)
|
126 |
spiga |
1.28 |
pass
|
127 |
|
|
pass
|
128 |
|
|
|
129 |
|
|
common.logger.debug(5,"sending info to ML")
|
130 |
|
|
params = {}
|
131 |
|
|
if WMS != None:
|
132 |
|
|
params = {'taskId': taskId, \
|
133 |
|
|
'jobId': jobId,\
|
134 |
|
|
'sid': str(jid), \
|
135 |
|
|
'StatusValueReason': job_status_reason, \
|
136 |
|
|
'StatusValue': jobStatus, \
|
137 |
|
|
'StatusEnterTime': job_last_time, \
|
138 |
|
|
'StatusDestination': dest, \
|
139 |
|
|
'RBname': WMS }
|
140 |
|
|
else:
|
141 |
|
|
params = {'taskId': taskId, \
|
142 |
|
|
'jobId': jobId,\
|
143 |
|
|
'sid': str(jid), \
|
144 |
|
|
'StatusValueReason': job_status_reason, \
|
145 |
|
|
'StatusValue': jobStatus, \
|
146 |
|
|
'StatusEnterTime': job_last_time, \
|
147 |
|
|
'StatusDestination': dest }
|
148 |
|
|
common.logger.debug(5,str(params))
|
149 |
|
|
common.apmon.sendToML(params)
|
150 |
|
|
|
151 |
|
|
return
|
152 |
ewv |
1.33 |
|
153 |
spiga |
1.24 |
def joinIntArray_(self,array) :
|
154 |
|
|
output = ''
|
155 |
|
|
for item in array :
|
156 |
|
|
output += str(item)+','
|
157 |
|
|
if output[-1] == ',' :
|
158 |
|
|
output = output[:-1]
|
159 |
|
|
return output
|
160 |
slacapra |
1.1 |
|