1 |
#!/bin/env python
|
2 |
from ConfigParser import ConfigParser
|
3 |
import string
|
4 |
import html as h
|
5 |
import os
|
6 |
import os.path
|
7 |
#used to resolve the configuration file outside the directory
|
8 |
dir=os.path.dirname(os.path.realpath(__file__))
|
9 |
|
10 |
Usage = """
|
11 |
loadLFN.py
|
12 |
Options:
|
13 |
--file file that does contain list of lFNs
|
14 |
--out_dir output directory
|
15 |
--verbose verbose
|
16 |
--skip string that has to be skipped (can have multiple values comma separeted)
|
17 |
--help help
|
18 |
"""
|
19 |
|
20 |
|
21 |
class LfnType:
|
22 |
def __init__(self):
|
23 |
self.info_data={}
|
24 |
self.info_mc={}
|
25 |
self.info_relval={}
|
26 |
self.info_user={}
|
27 |
self.info_group={}
|
28 |
self.info_results={}
|
29 |
self.tmp_list=[]
|
30 |
self.dqm_list=[]
|
31 |
self.lumi_list=[]
|
32 |
self.info_err={}
|
33 |
|
34 |
class ConfigFile(ConfigParser):
|
35 |
def __init__(self, path=None):
|
36 |
ConfigParser.__init__(self)
|
37 |
self.read(path)
|
38 |
|
39 |
|
40 |
|
41 |
def loadData(lfn_list,lfn,cfg,values_toskip=None):
|
42 |
|
43 |
|
44 |
data=loadList(cfg.get('DATA','data'))
|
45 |
tiers=loadList(cfg.get('TIER','tiers'))
|
46 |
PD_categories=loadDict(cfg.get('TIER','PD_categories'))
|
47 |
data_categories=loadDict(cfg.get('TIER','data_categories'))
|
48 |
msg_err={'generic':[],'type':[],'root_file':[],'file':[]}
|
49 |
|
50 |
for file in lfn_list:
|
51 |
|
52 |
value_toskip=False
|
53 |
if values_toskip:
|
54 |
for v in values_toskip:
|
55 |
if file.find(v) != -1:
|
56 |
value_toskip=True
|
57 |
break
|
58 |
if value_toskip:continue
|
59 |
|
60 |
|
61 |
|
62 |
err=False
|
63 |
#removing the final \n
|
64 |
file=file[:-1]
|
65 |
lst=file.split('/')
|
66 |
if len(lst) <6 :
|
67 |
msg_err['generic'].append("malformed file (too short %i): %s"%(len(lst),file))
|
68 |
continue
|
69 |
if '' in lst:lst.remove('')
|
70 |
type=lst[1]
|
71 |
|
72 |
|
73 |
if lst[0] !="store":
|
74 |
msg_err['type'].append("malformed file (doesn't contain store): %s"%(file))
|
75 |
continue
|
76 |
|
77 |
if type not in data:
|
78 |
if type not in lfn.info_err.keys():
|
79 |
lfn.info_err[type]={}
|
80 |
err=True
|
81 |
|
82 |
|
83 |
if lst[len(lst)-1][-4:] != 'root':
|
84 |
msg_err['root_file'].append("malformed file (not a root file) :%s"%file)
|
85 |
|
86 |
|
87 |
if type == 'lumi':
|
88 |
lfn.lumi_list.append(file)
|
89 |
elif type == 'dqm':
|
90 |
lfn.dqm_list.append(file)
|
91 |
elif type == 'temp':
|
92 |
lfn.tmp_list.append(file)
|
93 |
elif type in ('group','results'):
|
94 |
groupname=lst[2]
|
95 |
if type == 'group':
|
96 |
if groupname not in lfn.info_group.keys():
|
97 |
lfn.info_group[groupname]=[]
|
98 |
lfn.info_group[groupname].append(file)
|
99 |
elif type == 'result':
|
100 |
if groupname not in lfn.info_results.keys():
|
101 |
lfn.info_results[groupname]=[]
|
102 |
lfn.info_results[groupname].append(file)
|
103 |
elif type == 'user':
|
104 |
usrname=lst[2]
|
105 |
if usrname not in lfn.info_user:
|
106 |
lfn.info_user[usrname]=[]
|
107 |
lfn.info_user[usrname].append(file)
|
108 |
elif type in ('data', 'relval','mc'):
|
109 |
data_tier=(lst[4]).strip()
|
110 |
if data_tier not in tiers:
|
111 |
msg_err['file'].append("malformed file (not valid data tier %s): %s" %(data_tier,file))
|
112 |
continue
|
113 |
|
114 |
#extract the acquisition era
|
115 |
acq_era_lst=lst[2].split('-')
|
116 |
if len(acq_era_lst) < 2 : acq_era_lst=lst[2].split('_')
|
117 |
|
118 |
era=acq_era_lst[0]
|
119 |
|
120 |
for pd in PD_categories.keys():
|
121 |
|
122 |
if era in PD_categories[pd]:
|
123 |
era=pd
|
124 |
else:
|
125 |
for i in PD_categories[pd]:
|
126 |
if era.startswith(i):
|
127 |
era=pd
|
128 |
|
129 |
#data tier
|
130 |
for dt in sorted(data_categories.keys()):
|
131 |
if data_tier in data_categories[dt]:
|
132 |
data_tier=dt
|
133 |
|
134 |
g_lfn='/%s'%'/'.join(lst[0:6])
|
135 |
|
136 |
if type == 'data' :
|
137 |
|
138 |
if era not in lfn.info_data:
|
139 |
lfn.info_data[era]={}
|
140 |
|
141 |
|
142 |
if data_tier not in lfn.info_data[era]:
|
143 |
lfn.info_data[era][data_tier]={}
|
144 |
|
145 |
if g_lfn not in lfn.info_data[era][data_tier]:
|
146 |
lfn.info_data[era][data_tier][g_lfn]=[]
|
147 |
|
148 |
lfn.info_data[era][data_tier][g_lfn].append(file)
|
149 |
|
150 |
|
151 |
elif type == 'mc':
|
152 |
|
153 |
if era not in lfn.info_mc:
|
154 |
lfn.info_mc[era]={}
|
155 |
|
156 |
if data_tier not in lfn.info_mc[era]:
|
157 |
lfn.info_mc[era][data_tier]={}
|
158 |
|
159 |
if g_lfn not in lfn.info_mc[era][data_tier]:
|
160 |
lfn.info_mc[era][data_tier][g_lfn]=[]
|
161 |
|
162 |
lfn.info_mc[era][data_tier][g_lfn].append(file)
|
163 |
|
164 |
elif type == 'relval':
|
165 |
|
166 |
if era not in lfn.info_relval:
|
167 |
lfn.info_relval[era]={}
|
168 |
|
169 |
if data_tier not in lfn.info_relval[era]:
|
170 |
lfn.info_relval[era][data_tier]={}
|
171 |
|
172 |
if g_lfn not in lfn.info_relval[era][data_tier]:
|
173 |
lfn.info_relval[era][data_tier][g_lfn]=[]
|
174 |
|
175 |
lfn.info_relval[era][data_tier][g_lfn].append(file)
|
176 |
|
177 |
elif err:
|
178 |
key='/%s'%'/'.join(lst[0:len(lst)-2])
|
179 |
if key not in lfn.info_err[type].keys():
|
180 |
lfn.info_err[type][key]=[]
|
181 |
lfn.info_err[type][key].append(file)
|
182 |
|
183 |
|
184 |
|
185 |
return msg_err
|
186 |
|
187 |
|
188 |
def printdata(cfg,lfn,dir,verbose):
|
189 |
|
190 |
printMultipleDict(cfg,lfn.info_data,dir+"/data.html",'/store/data',verbose)
|
191 |
printMultipleDict(cfg,lfn.info_mc,dir+"/mc.html",'/store/mc',verbose)
|
192 |
printMultipleDict(cfg,lfn.info_relval,dir+"/relval.html",'/store/relval',verbose)
|
193 |
printDict(lfn.info_group,dir+"/group.html",'/store/group',verbose)
|
194 |
printDict(lfn.info_results,dir+"/result.html",'/store/result',verbose)
|
195 |
printDict(lfn.info_user,dir+"/user.html",'/store/user',verbose)
|
196 |
if len(lfn.tmp_list): printList(lfn.tmp_list,dir+"/temp.txt","/store/temp")
|
197 |
if len(lfn.dqm_list): printList(lfn.dqm_list,dir+"/dqm.txt","/store/dqm")
|
198 |
if len(lfn.lumi_list):printList(lfn.lumi_list,dir+"/lumi.txt","/store/lumi")
|
199 |
|
200 |
dir_err=dir+"/err"
|
201 |
for i,k in lfn.info_err.iteritems():
|
202 |
printDict(k,"%s/%s.html"%(dir_err,i),'/store/%s'%i,verbose)
|
203 |
|
204 |
|
205 |
|
206 |
def printList(lst,file,title):
|
207 |
|
208 |
fd=open(file,'w')
|
209 |
|
210 |
fd.write(title+'\n')
|
211 |
|
212 |
for item in lst:
|
213 |
|
214 |
fd.write(item+'\n')
|
215 |
|
216 |
fd.close()
|
217 |
|
218 |
def printDict(info,file,title,verbose=None):
|
219 |
|
220 |
if len(info.keys()):
|
221 |
|
222 |
fd=open(file,'w')
|
223 |
fd.write( h.head)
|
224 |
fd.write(h.body)
|
225 |
fd.write(title)
|
226 |
fd.write(h.table)
|
227 |
tot_lst=[len(item) for item in info.keys()]
|
228 |
|
229 |
for item in sorted(info.keys()):
|
230 |
fd.write(h.tr)
|
231 |
fd.write( h.td%item)
|
232 |
fd.write('<td>')
|
233 |
if verbose:
|
234 |
fd.write( h.inner_table)
|
235 |
for i in info[item]:
|
236 |
fd.write(h.tr)
|
237 |
fd.write('<td nowrap>%s</td>'%i)
|
238 |
fd.write( h.end_tr)
|
239 |
fd.write( h.end_table)
|
240 |
else:
|
241 |
el="%i"%len(info[item])
|
242 |
fd.write(h.td%el)
|
243 |
fd.write('</td>')
|
244 |
|
245 |
fd.write(h.end_tr)
|
246 |
fd.write(h.tr)
|
247 |
fd.write(h.td%'TOT')
|
248 |
fd.write(h.td%' ')
|
249 |
el='%i'%sum(tot_lst)
|
250 |
fd.write(h.td%el)
|
251 |
|
252 |
fd.write(h.end_tr)
|
253 |
fd.write(h.end_table)
|
254 |
fd.write(h.end_body)
|
255 |
fd.close()
|
256 |
|
257 |
|
258 |
def printMultipleDict(cfg,info,file,title,verbose=None) :
|
259 |
|
260 |
sorted_data_categories=loadList(cfg.get('TIER','sorted_datacategories'))
|
261 |
|
262 |
if len(info.keys()):
|
263 |
|
264 |
fd=open(file,'w')
|
265 |
fd.write( h.head)
|
266 |
fd.write(h.body)
|
267 |
fd.write(title)
|
268 |
fd.write(h.table)
|
269 |
fd.write(h.th%' ')
|
270 |
for i in sorted_data_categories:
|
271 |
fd.write( h.th%i)
|
272 |
fd.write(h.th%'TOT')
|
273 |
for k in info.iterkeys():
|
274 |
|
275 |
item=info[k]
|
276 |
tot=0
|
277 |
complete_item=[item.setdefault(l,{'':[]}) for l in sorted_data_categories]
|
278 |
|
279 |
fd.write(h.tr)
|
280 |
fd.write( h.td%k)
|
281 |
for inner_dict in complete_item:
|
282 |
fd.write('<td>')
|
283 |
if verbose:
|
284 |
fd.write( h.inner_table)
|
285 |
for k,v in inner_dict.iteritems():
|
286 |
fd.write(h.tr)
|
287 |
el='<b>%i</b> %s'%(len(v),k)
|
288 |
# tot=tot+len(v)
|
289 |
fd.write('<td nowrap>%s</td>'%el)
|
290 |
fd.write( h.end_tr)
|
291 |
fd.write( h.end_table)
|
292 |
else:
|
293 |
fd.write('<b>%i</b>'% sum([len(v) for k,v in inner_dict.iteritems()]))
|
294 |
tot=tot+ sum([len(v) for k,v in inner_dict.iteritems()])
|
295 |
fd.write('</td>')
|
296 |
|
297 |
fd.write(h.td%('%i'%tot))
|
298 |
fd.write( h.end_tr)
|
299 |
|
300 |
if len(info.keys()):
|
301 |
fd.write(h.end_table)
|
302 |
fd.write(h.end_body)
|
303 |
fd.close()
|
304 |
|
305 |
|
306 |
|
307 |
def loadList(lst):
|
308 |
|
309 |
if lst!=None:
|
310 |
# convert it to a list
|
311 |
tmp = string.split(lst,",")
|
312 |
lsts = []
|
313 |
for el in tmp:
|
314 |
sel = string.strip(el)
|
315 |
if sel!="":
|
316 |
lsts.append(sel)
|
317 |
|
318 |
return lsts
|
319 |
|
320 |
|
321 |
def loadDict(lst):
|
322 |
dict={}
|
323 |
|
324 |
if lst!=None:
|
325 |
dict = eval(lst)
|
326 |
|
327 |
return dict
|
328 |
|
329 |
def main(opts, args):
|
330 |
|
331 |
input_file=None
|
332 |
verbose=False
|
333 |
out_dir=None
|
334 |
values_toskip=None
|
335 |
|
336 |
for opt, val in opts:
|
337 |
if opt=='--help':
|
338 |
print Usage
|
339 |
sys.exit(1)
|
340 |
if opt=='--file':
|
341 |
input_file = val
|
342 |
if opt=='--verbose':
|
343 |
verbose=True
|
344 |
if opt=='--out_dir':
|
345 |
out_dir =val
|
346 |
if opt=='--skip':
|
347 |
values_toskip=val.split(",")
|
348 |
|
349 |
if not input_file or not out_dir:
|
350 |
print Usage
|
351 |
sys.exit(1)
|
352 |
|
353 |
if not os.path.exists(out_dir):
|
354 |
print "%s doesn't exist" %out_dir
|
355 |
sys.exit(1)
|
356 |
|
357 |
if not os.path.exists(out_dir+'/err'):
|
358 |
os.makedirs(out_dir+'/err')
|
359 |
print "created %s/err" %out_dir
|
360 |
|
361 |
cfg = ConfigFile( path = '%s/lfn.cfg'%dir)
|
362 |
|
363 |
if input_file.startswith('~'): input_file= os.path.expanduser(input_file)
|
364 |
if out_dir.startswith('~'): out_dir= os.path.expanduser(out_dir)
|
365 |
|
366 |
fs=open(input_file,"r")
|
367 |
lines =fs.readlines()
|
368 |
fs.close()
|
369 |
|
370 |
lfn=LfnType()
|
371 |
|
372 |
msg_err=loadData(lines,lfn,cfg,values_toskip)
|
373 |
|
374 |
for type,errLst in msg_err.iteritems() :
|
375 |
if len(errLst):
|
376 |
fd=open("%s/err/%s.err"%(out_dir,type),'w')
|
377 |
for err in errLst:
|
378 |
fd.write(err+'\n')
|
379 |
fd.close()
|
380 |
|
381 |
printdata(cfg,lfn,out_dir,verbose)
|
382 |
|
383 |
|
384 |
if __name__ == '__main__':
|
385 |
import sys
|
386 |
import getopt
|
387 |
|
388 |
try:
|
389 |
opts, args = getopt.getopt(sys.argv[1:], "", ["out_dir=","file=","verbose","skip=", "help"])
|
390 |
except getopt.GetoptError, msg:
|
391 |
print
|
392 |
print msg
|
393 |
print
|
394 |
print Usage
|
395 |
sys.exit(1)
|
396 |
|
397 |
if len(opts) <1 :
|
398 |
print
|
399 |
print Usage
|
400 |
sys.exit(1)
|
401 |
|
402 |
|
403 |
main( opts, args)
|