1 |
#!/env
|
2 |
|
3 |
import os
|
4 |
import sys
|
5 |
from fileInfo import *
|
6 |
|
7 |
|
8 |
def groupFilesToSize(files, finalSize = 1024*1024*1024*2):# < 3 GB
|
9 |
groupSize = 0
|
10 |
groups = [[]]
|
11 |
groupIndex = 0
|
12 |
for file in files:
|
13 |
size = os.path.getsize(file)
|
14 |
if (groupSize + size) > finalSize:#start new group
|
15 |
groupIndex += 1
|
16 |
groups.append([])
|
17 |
groupSize = 0
|
18 |
groupSize += size
|
19 |
groups[groupIndex].append(file)
|
20 |
return groups
|
21 |
|
22 |
def fuseFiles(groupedFiles):
|
23 |
group = 1
|
24 |
|
25 |
for files in groupedFiles:
|
26 |
outputFile = getProcess(files[0]) + '_merged_%d' % group
|
27 |
outputFile += '.root'
|
28 |
command = 'hadd -f7 %s ' % outputFile
|
29 |
for file in files:
|
30 |
command += file + ' '
|
31 |
command.rstrip(' ')
|
32 |
print 'Executing:'
|
33 |
print command
|
34 |
os.system(command)
|
35 |
group += 1
|
36 |
|
37 |
def getProcess(filepath):
|
38 |
file = filepath.split('/')[-1]
|
39 |
a = file.split('_')
|
40 |
process = 'default'
|
41 |
if len(a) <= 5:
|
42 |
process = a[0] + '_' + a[1]
|
43 |
else:
|
44 |
process = a[0]
|
45 |
for token in a[1:-3]:
|
46 |
process += '_' + token
|
47 |
return process
|
48 |
|
49 |
|
50 |
|
51 |
if __name__ == "__main__":
|
52 |
args = sys.argv
|
53 |
if not len(args) == 2:
|
54 |
print "Please specify a folder to merge files in."
|
55 |
sys.exit()
|
56 |
|
57 |
path = sys.argv[1]
|
58 |
files = getROOTFiles(path)
|
59 |
uniqueFiles = getUniqueFiles(files)
|
60 |
groupedFiles = groupFilesToSize(uniqueFiles)
|
61 |
|
62 |
print 'Total number of files:', len(files)
|
63 |
print 'Total number of unique files:', len(uniqueFiles)
|
64 |
print 'Process recognised:', getProcess(files[0])
|
65 |
print 'Input files per output file:', len(groupedFiles)
|
66 |
fuseFiles(groupedFiles)
|