15 |
|
# init client server params... |
16 |
|
CliServerParams(self) |
17 |
|
|
18 |
+ |
self.copyTout= setLcgTimeout() |
19 |
+ |
if common.scheduler.name().upper() in ['LSF', 'CAF']: |
20 |
+ |
self.copyTout= ' ' |
21 |
+ |
|
22 |
|
if self.storage_path[0]!='/': |
23 |
|
self.storage_path = '/'+self.storage_path |
24 |
|
|
39 |
|
try: |
40 |
|
seEl = SElement(self.storage_name, self.storage_proto, self.storage_port) |
41 |
|
except Exception, ex: |
42 |
< |
common.logger.debug(1, str(ex)) |
42 |
> |
common.logger.debug( str(ex)) |
43 |
|
msg = "ERROR: Unable to create SE source interface \n" |
44 |
|
raise CrabException(msg) |
45 |
|
try: |
46 |
|
loc = SElement("localhost", "local") |
47 |
|
except Exception, ex: |
48 |
< |
common.logger.debug(1, str(ex)) |
48 |
> |
common.logger.debug( str(ex)) |
49 |
|
msg = "ERROR: Unable to create destination interface \n" |
50 |
|
raise CrabException(msg) |
51 |
|
|
55 |
|
## get the list of jobs to get logging.info skimmed by failed status |
56 |
|
logginable = self.skimDeadList() |
57 |
|
|
58 |
< |
## iter over each asked job and print warning if not in skimmed list |
59 |
< |
for id in self.nj_list: |
60 |
< |
if id not in self.all_jobs: |
61 |
< |
common.logger.message('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ') |
62 |
< |
continue |
63 |
< |
elif id in logginable: |
64 |
< |
fname = self.fname_base + str(id) + '.LoggingInfo' |
65 |
< |
if os.path.exists(fname): |
66 |
< |
common.logger.message('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update') |
58 |
> |
if self.storage_proto in ['globus']: |
59 |
> |
for id in self.nj_list: |
60 |
> |
if id not in self.all_jobs: |
61 |
> |
common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ') |
62 |
> |
elif id not in logginable: |
63 |
> |
common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ') |
64 |
> |
PostMortem.collectOneLogging(self,id) |
65 |
> |
# construct a list of absolute paths of input files |
66 |
> |
# and the destinations to copy them to |
67 |
> |
sourcesList = [] |
68 |
> |
destsList = [] |
69 |
> |
self.taskuuid = str(common._db.queryTask('name')) |
70 |
> |
common.logger.debug( "Starting globus retrieval for task name: " + self.taskuuid) |
71 |
> |
remotedir = os.path.join(self.storage_path, self.taskuuid) |
72 |
> |
for i in logginable: |
73 |
> |
remotelog = remotedir + '/loggingInfo_'+str(i)+'.log' |
74 |
> |
sourcesList.append(remotelog) |
75 |
> |
fname = self.fname_base + str(i) + '.LoggingInfo' |
76 |
> |
destsList.append(fname) |
77 |
> |
|
78 |
> |
# try to do the copy |
79 |
> |
copy_res = None |
80 |
> |
try: |
81 |
> |
copy_res = sbi.copy( sourcesList, destsList, opt=self.copyTout) |
82 |
> |
except Exception, ex: |
83 |
> |
msg = "WARNING: Unable to retrieve logging" #ainfo file %s \n" % osbFiles[i] |
84 |
> |
msg += str(ex) |
85 |
> |
common.logger.debug(msg) |
86 |
> |
import traceback |
87 |
> |
common.logger.debug( str(traceback.format_exc()) ) |
88 |
> |
if copy_res is not None: |
89 |
> |
## evaluating copy results |
90 |
> |
copy_err_list = [] |
91 |
> |
count = 0 |
92 |
> |
for ll in map(None, copy_res, sourcesList): |
93 |
> |
exitcode = int(ll[0][0]) |
94 |
> |
if exitcode == 0: |
95 |
> |
## decode logging info |
96 |
> |
fl = open(destsList[count], 'r') |
97 |
> |
out = "".join(fl.readlines()) |
98 |
> |
fl.close() |
99 |
> |
reason = self.decodeLogging(out) |
100 |
> |
common.logger.info('Logging info for job '+ str(logginable[count]) +': '+str(reason)+'\n written to '+str(destsList[count])+' \n' ) |
101 |
> |
else: |
102 |
> |
common.logger.info('Logging info for job '+ str(logginable[count]) +' not retrieved. Tring to get loggingInfo manually') |
103 |
> |
PostMortem.collectOneLogging(self,logginable[count]) |
104 |
> |
count += 1 |
105 |
> |
else: |
106 |
> |
## iter over each asked job and print warning if not in skimmed list |
107 |
> |
for id in self.nj_list: |
108 |
> |
if id not in self.all_jobs: |
109 |
> |
common.logger.info('Warning: job # ' + str(id) + ' does not exist! Not possible to ask for postMortem ') |
110 |
|
continue |
111 |
< |
## retrieving & processing logging info |
112 |
< |
if self.retrieveFile( sbi, id, fname): |
113 |
< |
## decode logging info |
114 |
< |
fl = open(fname, 'r') |
115 |
< |
out = "".join(fl.readlines()) |
116 |
< |
fl.close() |
117 |
< |
reason = self.decodeLogging(out) |
118 |
< |
common.logger.message('Logging info for job '+ str(id) +': '+str(reason)+'\n written to '+str(fname)+' \n' ) |
111 |
> |
elif id in logginable: |
112 |
> |
fname = self.fname_base + str(id) + '.LoggingInfo' |
113 |
> |
if os.path.exists(fname): |
114 |
> |
common.logger.info('Logging info for job ' + str(id) + ' already present in '+fname+'\nRemove it for update') |
115 |
> |
continue |
116 |
> |
## retrieving & processing logging info |
117 |
> |
if self.retrieveFile( sbi, id, fname): |
118 |
> |
## decode logging info |
119 |
> |
fl = open(fname, 'r') |
120 |
> |
out = "".join(fl.readlines()) |
121 |
> |
fl.close() |
122 |
> |
reason = self.decodeLogging(out) |
123 |
> |
common.logger.info('Logging info for job '+ str(id) +': '+str(reason)+'\n written to '+str(fname)+' \n' ) |
124 |
> |
else: |
125 |
> |
common.logger.info('Logging info for job '+ str(id) +' not retrieved. Tring to get loggingInfo manually') |
126 |
> |
PostMortem.collectOneLogging(self,id) |
127 |
|
else: |
128 |
< |
common.logger.message('Logging info for job '+ str(id) +' not retrieved') |
129 |
< |
else: |
75 |
< |
common.logger.message('Warning: job # ' + str(id) + ' not killed or aborted! Not possible to ask for postMortem ') |
128 |
> |
common.logger.info('Warning: job # ' + str(id) + ' not killed or aborted! Will get loggingInfo manually ') |
129 |
> |
PostMortem.collectOneLogging(self,id) |
130 |
|
return |
131 |
|
|
132 |
|
|
149 |
|
retrieves logging.info file from the server storage area |
150 |
|
""" |
151 |
|
self.taskuuid = str(common._db.queryTask('name')) |
152 |
< |
common.logger.debug(3, "Task name: " + self.taskuuid) |
152 |
> |
common.logger.debug( "Task name: " + self.taskuuid) |
153 |
|
|
154 |
|
# full remote dir |
155 |
|
remotedir = os.path.join(self.storage_path, self.taskuuid) |
156 |
|
remotelog = remotedir + '/loggingInfo_'+str(jobid)+'.log' |
157 |
|
|
158 |
< |
common.logger.message("Starting retrieving logging-info from server " \ |
158 |
> |
common.logger.info("Starting retrieving logging-info from server " \ |
159 |
|
+ str(self.storage_name) + " for job " \ |
160 |
|
+ str(jobid) + "...") |
161 |
|
|
162 |
|
# retrieve logging info from storage |
163 |
< |
common.logger.debug(1, "retrieving "+ str(remotelog) +" to "+ str(destlog) ) |
163 |
> |
common.logger.debug( "retrieving "+ str(remotelog) +" to "+ str(destlog) ) |
164 |
|
try: |
165 |
|
sbi.copy( remotelog, destlog) |
166 |
|
except Exception, ex: |
167 |
|
msg = "WARNING: Unable to retrieve logging-info file %s \n"%remotelog |
168 |
|
msg += str(ex) |
169 |
< |
common.logger.debug(1,msg) |
169 |
> |
common.logger.debug(msg) |
170 |
|
return False |
171 |
|
# cleaning remote logging info file |
172 |
|
try: |
173 |
< |
common.logger.debug(5, "Cleaning remote file [%s] " + str(remotelog) ) |
173 |
> |
common.logger.debug( "Cleaning remote file [%s] " %( str(remotelog) ) ) |
174 |
|
sbi.delete(remotelog) |
175 |
|
except Exception, ex: |
176 |
|
msg = "WARNING: Unable to clean remote logging-info file %s \n"%remotelog |
177 |
|
msg += str(ex) |
178 |
< |
common.logger.debug(5,msg) |
178 |
> |
common.logger.debug(msg) |
179 |
|
return True |