1 |
dhidas |
1.1 |
#!/env
|
2 |
|
|
|
3 |
|
|
import os
|
4 |
|
|
import sys
|
5 |
|
|
from fileInfo import *
|
6 |
|
|
|
7 |
|
|
|
8 |
|
|
def groupFilesToSize(files, finalSize = 1024*1024*1024*2):# < 3 GB
|
9 |
|
|
groupSize = 0
|
10 |
|
|
groups = [[]]
|
11 |
|
|
groupIndex = 0
|
12 |
|
|
for file in files:
|
13 |
|
|
size = os.path.getsize(file)
|
14 |
|
|
if (groupSize + size) > finalSize:#start new group
|
15 |
|
|
groupIndex += 1
|
16 |
|
|
groups.append([])
|
17 |
|
|
groupSize = 0
|
18 |
|
|
groupSize += size
|
19 |
|
|
groups[groupIndex].append(file)
|
20 |
|
|
return groups
|
21 |
|
|
|
22 |
|
|
def fuseFiles(groupedFiles):
|
23 |
|
|
group = 1
|
24 |
|
|
|
25 |
|
|
for files in groupedFiles:
|
26 |
|
|
outputFile = getProcess(files[0]) + '_merged_%d' % group
|
27 |
|
|
outputFile += '.root'
|
28 |
|
|
command = 'hadd -f7 %s ' % outputFile
|
29 |
|
|
for file in files:
|
30 |
|
|
command += file + ' '
|
31 |
|
|
command.rstrip(' ')
|
32 |
|
|
print 'Executing:'
|
33 |
|
|
print command
|
34 |
|
|
os.system(command)
|
35 |
|
|
group += 1
|
36 |
|
|
|
37 |
|
|
def getProcess(filepath):
|
38 |
|
|
file = filepath.split('/')[-1]
|
39 |
|
|
a = file.split('_')
|
40 |
|
|
process = 'default'
|
41 |
|
|
if len(a) <= 5:
|
42 |
|
|
process = a[0] + '_' + a[1]
|
43 |
|
|
else:
|
44 |
|
|
process = a[0]
|
45 |
|
|
for token in a[1:-3]:
|
46 |
|
|
process += '_' + token
|
47 |
|
|
return process
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
if __name__ == "__main__":
|
52 |
|
|
args = sys.argv
|
53 |
|
|
if not len(args) == 2:
|
54 |
|
|
print "Please specify a folder to merge files in."
|
55 |
|
|
sys.exit()
|
56 |
|
|
|
57 |
|
|
path = sys.argv[1]
|
58 |
|
|
files = getROOTFiles(path)
|
59 |
|
|
uniqueFiles = getUniqueFiles(files)
|
60 |
|
|
groupedFiles = groupFilesToSize(uniqueFiles)
|
61 |
|
|
|
62 |
|
|
print 'Total number of files:', len(files)
|
63 |
|
|
print 'Total number of unique files:', len(uniqueFiles)
|
64 |
|
|
print 'Process recognised:', getProcess(files[0])
|
65 |
|
|
print 'Input files per output file:', len(groupedFiles)
|
66 |
|
|
fuseFiles(groupedFiles)
|