1 |
#!/usr/bin/env python
|
2 |
"""
|
3 |
_ModifyJobReport.py
|
4 |
|
5 |
Example of how to use the FwkJobRep package to update a job report post processing
|
6 |
|
7 |
|
8 |
"""
|
9 |
import os, string
|
10 |
import sys
|
11 |
import popen2
|
12 |
|
13 |
from ProdCommon.FwkJobRep.ReportParser import readJobReport
|
14 |
|
15 |
|
16 |
def readCksum(filename):
|
17 |
"""
|
18 |
_readCksum_
|
19 |
|
20 |
Run a cksum command on a file an return the checksum value
|
21 |
|
22 |
"""
|
23 |
pop = popen2.Popen4("cksum %s" % filename)
|
24 |
while pop.poll() == -1:
|
25 |
exitStatus = pop.poll()
|
26 |
exitStatus = pop.poll()
|
27 |
if exitStatus:
|
28 |
return None
|
29 |
content = pop.fromchild.read()
|
30 |
value = content.strip()
|
31 |
value = content.split()[0]
|
32 |
print "checksum = ", value
|
33 |
return value
|
34 |
|
35 |
|
36 |
def fileSize(filename):
|
37 |
"""
|
38 |
_fileSize_
|
39 |
|
40 |
Get size of file
|
41 |
|
42 |
"""
|
43 |
print "(size) os.stat(filename)[6] = ", os.stat(filename)[6]
|
44 |
return os.stat(filename)[6]
|
45 |
|
46 |
|
47 |
def addFileStats(file):
|
48 |
"""
|
49 |
_addFileStats_
|
50 |
|
51 |
Add checksum and size info to each size
|
52 |
|
53 |
"""
|
54 |
#if not os.path.exists(file['PFN']):
|
55 |
# print "Error: Cannot find file: %s " % file['PFN']
|
56 |
# return 1
|
57 |
file['Size'] = fileSize(file['PFN'])
|
58 |
checkSum = readCksum(file['PFN'])
|
59 |
file.addChecksum('cksum',checkSum)
|
60 |
return
|
61 |
|
62 |
|
63 |
def modifyFile(file):
|
64 |
"""
|
65 |
_modifyFile_
|
66 |
|
67 |
Calls functions to modify PFN and LFN
|
68 |
"""
|
69 |
|
70 |
str.split(str(file['PFN']), '.root')
|
71 |
pref = str.split(str(file['PFN']), '.root')[0]
|
72 |
suff = str.split(str(file['PFN']), pref)[1]
|
73 |
|
74 |
### FEDE changing se_path with the endpoint
|
75 |
#newPfn = diz['se_name'] + diz['se_path'] + pref + '_' + diz['n_job'] + suff
|
76 |
newPfn = diz['se_path'] + pref + '_' + diz['n_job'] + suff
|
77 |
print "newPfn = ", newPfn
|
78 |
#########################
|
79 |
|
80 |
newLfn = diz['for_lfn'] + pref + '_' + diz['n_job'] + suff
|
81 |
print "newLfn = ", newLfn
|
82 |
|
83 |
updatePFN(file, file['LFN'], newPfn)
|
84 |
|
85 |
updateLFN(file, file['LFN'], newLfn)
|
86 |
|
87 |
return
|
88 |
|
89 |
|
90 |
def updatePFN(file, lfn, newPFN):
|
91 |
"""
|
92 |
_updatePFN_
|
93 |
|
94 |
Update a PFN for an LFN, based on a stage out to some SE.
|
95 |
"""
|
96 |
if file['LFN'] != lfn:
|
97 |
return
|
98 |
|
99 |
file['PFN'] = newPFN
|
100 |
file['SEName'] = diz['se_name']
|
101 |
return
|
102 |
|
103 |
|
104 |
def updateLFN(file, lfn, newLFN):
|
105 |
"""
|
106 |
_updateLFN_
|
107 |
|
108 |
Update a LFN.
|
109 |
"""
|
110 |
if file['LFN'] != lfn:
|
111 |
return
|
112 |
file['LFN'] = newLFN
|
113 |
return
|
114 |
|
115 |
|
116 |
if __name__ == '__main__':
|
117 |
|
118 |
# Example: Load the report, update the file stats, pretend to do a stage out
|
119 |
# and update the information for the stage out
|
120 |
|
121 |
|
122 |
L = sys.argv[1:]
|
123 |
if len(L) < 21:
|
124 |
print "Error: wrong number of arguments passed to the ModifyJobreport. Please check your script"
|
125 |
sys.exit(1)
|
126 |
diz={}
|
127 |
|
128 |
i = 0
|
129 |
while i < len(L):
|
130 |
diz[L[i]] = L[i+1]
|
131 |
i = i + 2
|
132 |
|
133 |
if diz.has_key('fjr'):
|
134 |
inputReport = diz['fjr']
|
135 |
reports = readJobReport(inputReport)
|
136 |
|
137 |
# report is an instance of FwkJobRep.FwkJobReport class
|
138 |
# can be N in a file, so a list is always returned
|
139 |
# by for readJobReport, here I am assuming just one report per file for simplicity
|
140 |
try:
|
141 |
report = reports[-1]
|
142 |
except IndexError:
|
143 |
print "Error: No file to publish in xml file"
|
144 |
sys.exit(1)
|
145 |
else:
|
146 |
print "no crab fjr found"
|
147 |
sys.exit(1)
|
148 |
|
149 |
|
150 |
# ARGs parameters
|
151 |
if diz.has_key('n_job'):
|
152 |
n_job = diz['n_job']
|
153 |
else:
|
154 |
print "it is necessary to specify the job number"
|
155 |
sys.exit(1)
|
156 |
|
157 |
if diz.has_key('UserProcessedDataset'):
|
158 |
UserProcessedDataset = diz['UserProcessedDataset']
|
159 |
else:
|
160 |
UserProcessedDataset=''
|
161 |
print "UserProcessedDataset = ", UserProcessedDataset
|
162 |
|
163 |
#### Adding AnalysisFile ####
|
164 |
if (len(report.files) == 0) and (len(report.analysisFiles) == 0):
|
165 |
print "no EDM_output file or NO_EDM_output to modify"
|
166 |
print "Adding a no EDM_output file"
|
167 |
files=str.split(str(diz['file_list']), ',')
|
168 |
#print "files = ", files
|
169 |
for file in files:
|
170 |
split = str.split(str(file), '/')
|
171 |
if (len(split) > 0):
|
172 |
file_name = split[len(split)-1]
|
173 |
else:
|
174 |
file_name = file
|
175 |
|
176 |
report.newAnalysisFile()
|
177 |
for aFile in report.analysisFiles:
|
178 |
if (aFile['SEName'] == None):
|
179 |
aFile['SEName']=diz['se_name']
|
180 |
if (aFile['LFN'] == None):
|
181 |
aFile['LFN']=diz['for_lfn']+file_name
|
182 |
if (aFile['PFN'] == None):
|
183 |
aFile['PFN']=diz['se_path']+file_name
|
184 |
report.save()
|
185 |
|
186 |
report.write("NewFrameworkJobReport.xml")
|
187 |
else:
|
188 |
if (len(report.files) != 0):
|
189 |
for f in report.files:
|
190 |
if (string.find(f['PFN'], ':') != -1):
|
191 |
tmp_path = string.split(f['PFN'], ':')
|
192 |
f['PFN'] = tmp_path[1]
|
193 |
if not os.path.exists(f['PFN']):
|
194 |
print "Error: Cannot find file: %s " % f['PFN']
|
195 |
sys.exit(1)
|
196 |
#Generate per file stats
|
197 |
addFileStats(f)
|
198 |
|
199 |
datasetinfo=f.newDataset()
|
200 |
datasetinfo['PrimaryDataset'] = diz['PrimaryDataset']
|
201 |
datasetinfo['DataTier'] = "USER"
|
202 |
datasetinfo['ProcessedDataset'] = UserProcessedDataset
|
203 |
datasetinfo['ApplicationFamily'] = diz['ApplicationFamily']
|
204 |
datasetinfo['ApplicationName'] = diz['ApplicationName']
|
205 |
datasetinfo['ApplicationVersion'] = diz['cmssw_version']
|
206 |
datasetinfo['PSetHash'] = diz['psethash']
|
207 |
datasetinfo['PSetContent'] = "TOBEADDED"
|
208 |
### to check if the job output is composed by more files
|
209 |
modifyFile(f)
|
210 |
|
211 |
if (len(report.analysisFiles) != 0):
|
212 |
for aFile in report.analysisFiles:
|
213 |
aFile['PFN'] = os.path.basename(aFile['FileName'])
|
214 |
modifyFile(aFile)
|
215 |
|
216 |
# After modifying the report, you can then save it to a file.
|
217 |
report.write("NewFrameworkJobReport.xml")
|
218 |
|
219 |
|
220 |
|