ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/dhidas/OSUAnalysis/Tools/scripts/remove_duplicates
Revision: 1.1
Committed: Thu Dec 1 16:28:48 2011 UTC (13 years, 5 months ago) by dhidas
Branch point for: dhidas, MAIN
Log Message:
Initial revision

File Contents

# User Rev Content
1 dhidas 1.1 #!/usr/bin/python
2     '''
3     Created on 1 Jun 2010
4    
5     @author: kreczko
6    
7     Email: kreczko@cern.ch
8     '''
9    
10     from optparse import OptionParser
11     import os
12     import copy
13    
14     duplicates = []
15     duplicateFiles = {}
16    
17     def getUniqueFiles(files):
18     if listContainsDuplicates(files):
19     findDuplicates(files)
20     else:
21     return files
22     uniqueFiles = copy.copy(files)
23     for values in duplicateFiles.itervalues():
24     for value in values:
25     uniqueFiles.remove(value)
26     values.sort()
27     uniqueFiles.append(values[-1])
28     return uniqueFiles
29    
30     def listContainsDuplicates(list):
31     seen = []
32     for item in list:
33     jobnumber = extractJobnumber(item)
34     if jobnumber in seen:
35     duplicates.append(jobnumber)
36     else:
37     seen.append(jobnumber)
38     return len(duplicates) >0
39    
40     def findDuplicates(files):
41     for file in files:
42     for job in duplicates:
43     if job == extractJobnumber(file):
44     addDuplicate(job, file)
45    
46     def extractJobnumber(file):
47     jobnumber = file.split('_')[-3]
48     return int(jobnumber)
49    
50     def addDuplicate(jobnumber, file):
51     if not duplicateFiles.has_key(jobnumber):
52     duplicateFiles[jobnumber] = []
53     duplicateFiles[jobnumber].append(file)
54    
55     def removeDuplicates(path, files):
56     print 'Number of file in path:', len(files)
57     files.sort()
58     uniqueFiles = getUniqueFiles(files)
59     uniqueFiles.sort()
60     print 'Number of unique files', len(uniqueFiles)
61     filesToRemove = [file for file in files if not file in uniqueFiles]
62     print 'Number of duplicate files:', len(filesToRemove)
63     [remove(path + file) for file in filesToRemove]
64    
65     def remove(file):
66     print 'removing',file
67     os.remove(file)
68    
69     if __name__ == "__main__":
70     parser = OptionParser()
71     (options, args) = parser.parse_args()
72     if len(args) >0:
73     path = args[0]
74     files = os.listdir(path)
75     removeDuplicates(path, files)
76     else:
77     print 'File path was not specified. Use script "./remove_duplicates path"'