1 |
#!/usr/bin/python
|
2 |
|
3 |
#
|
4 |
# This is a wrapper script around the hadd tool.
|
5 |
# - hadd tool merges multiple root files with same tree
|
6 |
# but is limited to 1000 files
|
7 |
# - this script can merge up to 1000 * 1000 root files
|
8 |
#
|
9 |
# Todo:
|
10 |
# - extend to merge more files.
|
11 |
#
|
12 |
|
13 |
import subprocess
|
14 |
import sys
|
15 |
import os
|
16 |
|
17 |
def callHadd(outfile, filelist):
|
18 |
if os.path.exists(outfile):
|
19 |
print 'ERROR: file ', outfile, ' exists already.'
|
20 |
sys.exit()
|
21 |
|
22 |
cmd = 'hadd %s %s' % (outfile, ' '.join(filelist))
|
23 |
#print cmd
|
24 |
if not subprocess.call(cmd, shell=True) == 0:
|
25 |
print 'ERROR: while executing hadd.'
|
26 |
sys.exit()
|
27 |
|
28 |
|
29 |
def hadd(outfile, infiles):
|
30 |
nFiles = 0
|
31 |
files = []
|
32 |
nTempfiles = 0
|
33 |
tempfiles = []
|
34 |
|
35 |
for f in infiles:
|
36 |
if os.path.splitext(f)[1] == '.root':
|
37 |
files.append(f)
|
38 |
nFiles+=1
|
39 |
|
40 |
if nFiles > 0 and nFiles % 1000 == 0:
|
41 |
nTempfiles+=1
|
42 |
tmpfile = '%s_%03d.root' % (os.path.splitext(outfile)[0], nTempfiles)
|
43 |
callHadd(tmpfile, files)
|
44 |
files = []
|
45 |
tempfiles.append(tmpfile)
|
46 |
|
47 |
# join rest
|
48 |
if nFiles > 0 and not nFiles % 1000 == 0:
|
49 |
nTempfiles+=1
|
50 |
tmpfile = '%s_%03d.root' % (os.path.splitext(outfile)[0], nTempfiles)
|
51 |
callHadd(tmpfile, files)
|
52 |
tempfiles.append(tmpfile)
|
53 |
|
54 |
# join the tempfiles
|
55 |
callHadd(outfile, tempfiles)
|
56 |
|
57 |
# remove tempfiles
|
58 |
for f in tempfiles:
|
59 |
os.remove(f)
|
60 |
|
61 |
print '%d files joined in %s.' % (nFiles, outfile)
|
62 |
|
63 |
# ----------------------------------------------------------------------
|
64 |
|
65 |
if __name__ == '__main__':
|
66 |
|
67 |
if len(sys.argv) < 3:
|
68 |
print 'Usage: ', sys.argv[0], ' outfile infile1 [infile2] ...'
|
69 |
exit()
|
70 |
|
71 |
outfile = sys.argv[1]
|
72 |
|
73 |
if not os.path.splitext(outfile)[1] == '.root':
|
74 |
print 'ERROR: outfile ', outfile, 'must be a root file.'
|
75 |
exit()
|
76 |
|
77 |
if os.path.exists(outfile):
|
78 |
print 'ERROR: outfile ', outfile, ' must not exist.'
|
79 |
exit()
|
80 |
|
81 |
hadd(outfile, sys.argv[2:])
|