ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/dhidas/OSUAnalysis/Tools/scripts/fileInfo.py
Revision: 1.1.1.1 (vendor branch)
Committed: Thu Dec 1 16:28:48 2011 UTC (13 years, 5 months ago) by dhidas
Content type: text/x-python
Branch: dhidas, MAIN
CVS Tags: START, HEAD
Changes since 1.1: +0 -0 lines
Log Message:
osu copy modified

File Contents

# User Rev Content
1 dhidas 1.1 #!/usr/bin/python
2     '''
3     Created on 1 Jun 2010
4    
5     @author: kreczko
6    
7     Email: kreczko@cern.ch
8     '''
9    
10     from optparse import OptionParser
11     import os
12     import copy
13     import glob
14    
15     duplicates = []
16     duplicateFiles = {}
17    
18     def getROOTFiles(path):
19     path += "/*.root"
20     files = glob.glob(path)
21     return files
22    
23     def getUniqueFiles(files):
24     if listContainsDuplicates(files):
25     findDuplicates(files)
26     else:
27     return files
28     uniqueFiles = copy.copy(files)
29     for values in duplicateFiles.itervalues():
30     for value in values:
31     uniqueFiles.remove(value)
32     values.sort()
33     uniqueFiles.append(values[-1])
34     return uniqueFiles
35    
36     def listContainsDuplicates(list):
37     seen = []
38     for item in list:
39     jobnumber = extractJobnumber(item)
40     if jobnumber in seen:
41     duplicates.append(jobnumber)
42     else:
43     seen.append(jobnumber)
44     return len(duplicates) >0
45    
46     def findDuplicates(files):
47     for file in files:
48     for job in duplicates:
49     if job == extractJobnumber(file):
50     addDuplicate(job, file)
51    
52     def extractJobnumber(file):
53     jobnumber = file.split('_')[-3]
54     return int(jobnumber)
55    
56     def addDuplicate(jobnumber, file):
57     if not duplicateFiles.has_key(jobnumber):
58     duplicateFiles[jobnumber] = []
59     duplicateFiles[jobnumber].append(file)
60    
61     def getDuplicateFiles(allFiles, uniqueFiles):
62     # print 'Number of file in path:', len(files)
63     # files.sort()
64     # uniqueFiles = getUniqueFiles(files)
65     # uniqueFiles.sort()
66     # print 'Number of unique files', len(uniqueFiles)
67     filesToRemove = [file for file in allFiles if not file in uniqueFiles]
68     return filesToRemove
69    
70     if __name__ == "__main__":
71     parser = OptionParser()
72     (options, args) = parser.parse_args()
73     if len(args) >0:
74     path = args[0]
75     files = os.listdir(path)
76     files.sort()
77     uniqueFiles = getUniqueFiles(files)
78     uniqueFiles.sort()
79     duplicateFiles = getDuplicateFiles(files, uniqueFiles)
80     print 'Number of file in path:', len(files)
81     print 'Number of unique files', len(uniqueFiles)
82     print 'Number of duplicate files:', len(duplicateFiles)
83     if len(duplicateFiles) > 0:
84     print 'Files to remove:'
85     for file in duplicateFiles:
86     print path + file
87     else:
88     print 'File path was not specified. Use script "./remove_duplicates path"'