ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/dhidas/OSUAnalysis/Tools/scripts/fileInfo.py
Revision: 1.1.1.1 (vendor branch)
Committed: Thu Dec 1 16:28:48 2011 UTC (13 years, 5 months ago) by dhidas
Content type: text/x-python
Branch: dhidas, MAIN
CVS Tags: START, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
osu copy modified

File Contents

# Content
1 #!/usr/bin/python
2 '''
3 Created on 1 Jun 2010
4
5 @author: kreczko
6
7 Email: kreczko@cern.ch
8 '''
9
10 from optparse import OptionParser
11 import os
12 import copy
13 import glob
14
15 duplicates = []
16 duplicateFiles = {}
17
18 def getROOTFiles(path):
19 path += "/*.root"
20 files = glob.glob(path)
21 return files
22
23 def getUniqueFiles(files):
24 if listContainsDuplicates(files):
25 findDuplicates(files)
26 else:
27 return files
28 uniqueFiles = copy.copy(files)
29 for values in duplicateFiles.itervalues():
30 for value in values:
31 uniqueFiles.remove(value)
32 values.sort()
33 uniqueFiles.append(values[-1])
34 return uniqueFiles
35
36 def listContainsDuplicates(list):
37 seen = []
38 for item in list:
39 jobnumber = extractJobnumber(item)
40 if jobnumber in seen:
41 duplicates.append(jobnumber)
42 else:
43 seen.append(jobnumber)
44 return len(duplicates) >0
45
46 def findDuplicates(files):
47 for file in files:
48 for job in duplicates:
49 if job == extractJobnumber(file):
50 addDuplicate(job, file)
51
52 def extractJobnumber(file):
53 jobnumber = file.split('_')[-3]
54 return int(jobnumber)
55
56 def addDuplicate(jobnumber, file):
57 if not duplicateFiles.has_key(jobnumber):
58 duplicateFiles[jobnumber] = []
59 duplicateFiles[jobnumber].append(file)
60
61 def getDuplicateFiles(allFiles, uniqueFiles):
62 # print 'Number of file in path:', len(files)
63 # files.sort()
64 # uniqueFiles = getUniqueFiles(files)
65 # uniqueFiles.sort()
66 # print 'Number of unique files', len(uniqueFiles)
67 filesToRemove = [file for file in allFiles if not file in uniqueFiles]
68 return filesToRemove
69
70 if __name__ == "__main__":
71 parser = OptionParser()
72 (options, args) = parser.parse_args()
73 if len(args) >0:
74 path = args[0]
75 files = os.listdir(path)
76 files.sort()
77 uniqueFiles = getUniqueFiles(files)
78 uniqueFiles.sort()
79 duplicateFiles = getDuplicateFiles(files, uniqueFiles)
80 print 'Number of file in path:', len(files)
81 print 'Number of unique files', len(uniqueFiles)
82 print 'Number of duplicate files:', len(duplicateFiles)
83 if len(duplicateFiles) > 0:
84 print 'Files to remove:'
85 for file in duplicateFiles:
86 print path + file
87 else:
88 print 'File path was not specified. Use script "./remove_duplicates path"'