ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/dhidas/OSUAnalysis/Tools/scripts/remove_duplicates
Revision: 1.1.1.1 (vendor branch)
Committed: Thu Dec 1 16:28:48 2011 UTC (13 years, 5 months ago) by dhidas
Branch: dhidas, MAIN
CVS Tags: START, HEAD
Changes since 1.1: +0 -0 lines
Error occurred while calculating annotation data.
Log Message:
osu copy modified

File Contents

# Content
1 #!/usr/bin/python
2 '''
3 Created on 1 Jun 2010
4
5 @author: kreczko
6
7 Email: kreczko@cern.ch
8 '''
9
10 from optparse import OptionParser
11 import os
12 import copy
13
14 duplicates = []
15 duplicateFiles = {}
16
17 def getUniqueFiles(files):
18 if listContainsDuplicates(files):
19 findDuplicates(files)
20 else:
21 return files
22 uniqueFiles = copy.copy(files)
23 for values in duplicateFiles.itervalues():
24 for value in values:
25 uniqueFiles.remove(value)
26 values.sort()
27 uniqueFiles.append(values[-1])
28 return uniqueFiles
29
30 def listContainsDuplicates(list):
31 seen = []
32 for item in list:
33 jobnumber = extractJobnumber(item)
34 if jobnumber in seen:
35 duplicates.append(jobnumber)
36 else:
37 seen.append(jobnumber)
38 return len(duplicates) >0
39
40 def findDuplicates(files):
41 for file in files:
42 for job in duplicates:
43 if job == extractJobnumber(file):
44 addDuplicate(job, file)
45
46 def extractJobnumber(file):
47 jobnumber = file.split('_')[-3]
48 return int(jobnumber)
49
50 def addDuplicate(jobnumber, file):
51 if not duplicateFiles.has_key(jobnumber):
52 duplicateFiles[jobnumber] = []
53 duplicateFiles[jobnumber].append(file)
54
55 def removeDuplicates(path, files):
56 print 'Number of file in path:', len(files)
57 files.sort()
58 uniqueFiles = getUniqueFiles(files)
59 uniqueFiles.sort()
60 print 'Number of unique files', len(uniqueFiles)
61 filesToRemove = [file for file in files if not file in uniqueFiles]
62 print 'Number of duplicate files:', len(filesToRemove)
63 [remove(path + file) for file in filesToRemove]
64
65 def remove(file):
66 print 'removing',file
67 os.remove(file)
68
69 if __name__ == "__main__":
70 parser = OptionParser()
71 (options, args) = parser.parse_args()
72 if len(args) >0:
73 path = args[0]
74 files = os.listdir(path)
75 removeDuplicates(path, files)
76 else:
77 print 'File path was not specified. Use script "./remove_duplicates path"'