ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.5
Committed: Fri Jul 30 18:41:11 2010 UTC (14 years, 9 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_017pre3, Mit_017pre2, Mit_017pre1, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c
Changes since 1.4: +50 -73 lines
Log Message:
Cleaned up and updated version.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to identify all samples which are kept in our database.
4     #
5     # Author: C.Paus (September 23, 2008)
6     #---------------------------------------------------------------------------------------------------
7     import os,sys,getopt,re,string
8    
9     def findStartedDatasets(path):
10     print " Collecting information over started samples"
11     datasetList = []
12    
13     cmd = 'list ' + path
14     for line in os.popen(cmd).readlines(): # run command
15     line = line[:-1] # strip '\n'
16     f = line.split(" ")
17     size = f[0]
18     file = f[1]
19    
20     #if debug == 1:
21     # print ' Debug:: adding: ' + file + ' with size ' + size
22     datasetList.append(file)
23    
24     return datasetList
25    
26     def findOngoingDatasets(path):
27     print " Collecting information over ongoing samples"
28     datasetList = []
29    
30     cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
31     for line in os.popen(cmd).readlines(): # run command
32     line = line[:-1] # strip '\n'
33     f = line.split("/")
34     dataset = f[-1]
35     if re.search('crab_0',dataset):
36     dataset = f[-2]
37    
38     #if debug == 1:
39     # print ' Debug:: adding: ' + dataset '
40     datasetList.append(dataset)
41    
42     return datasetList
43    
44     def findCompletedDatasets(path):
45     print " Collecting information over completed samples"
46     datasetList = []
47    
48 paus 1.3 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
49 paus 1.2 for line in os.popen(cmd).readlines(): # run command
50     line = line[:-1] # strip '\n'
51     dataset = line
52    
53     #if debug == 1:
54     # print ' Debug:: adding: ' + dataset '
55     datasetList.append(dataset)
56    
57     return datasetList
58    
59     def inList(file,list):
60     for entry in list:
61     if entry == file:
62     return True
63     return False
64    
65     def cleanupCompletedList(ongoingDsetList,completedDsetList):
66     print " Update completed list with ongoing list"
67    
68     for dataset in ongoingDsetList:
69     if inList(dataset,completedDsetList):
70     print ' -> removing fropm completed: ' + dataset
71     cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
72     cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
73     os.system(cmd)
74    
75     #===================================================================================================
76     # Main starts here
77     #===================================================================================================
78     # Define string to explain usage of the script
79     usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
80     usage += " --version=<version>\n"
81     usage += " --cmssw=<name>\n"
82 paus 1.4 usage += " --pattern=<name>\n"
83 paus 1.5 usage += " --download=<int: -1,0,1>\n"
84 paus 1.2 usage += " --exe\n"
85     usage += " --noInfo\n"
86     usage += " --forceCopy\n"
87     usage += " --debug\n"
88     usage += " --help\n\n"
89    
90     # Define the valid options which can be specified and check out the command line
91 paus 1.5 valid = ['mitCfg=','version=','cmssw=','pattern=','download=', \
92     'help','exe','complete','noInfo','forceCopy','debug']
93 paus 1.2 try:
94     opts, args = getopt.getopt(sys.argv[1:], "", valid)
95     except getopt.GetoptError, ex:
96     print usage
97     print str(ex)
98     sys.exit(1)
99    
100     # --------------------------------------------------------------------------------------------------
101     # Get all parameters for the production
102     # --------------------------------------------------------------------------------------------------
103     # Set defaults for each option
104     mitCfg = 'filefi'
105 paus 1.3 version = '014'
106 paus 1.2 cmssw = ''
107 paus 1.4 pattern = ''
108 paus 1.2 cmsswCfg = 'cmssw.cfg'
109     exe = 0
110 paus 1.5 complete = 0
111 paus 1.2 noInfo = False
112 paus 1.5 download = -1
113 paus 1.2 forceCopy = False
114     debug = False
115    
116     # Read new values from the command line
117     for opt, arg in opts:
118     if opt == "--help":
119     print usage
120     sys.exit(0)
121     if opt == "--mitCfg":
122     mitCfg = arg
123     if opt == "--version":
124     version = arg
125     if opt == "--cmssw":
126     cmssw = arg
127 paus 1.4 if opt == "--pattern":
128     pattern = arg
129 paus 1.2 if opt == "--exe":
130     exe = 1
131 paus 1.5 if opt == "--download":
132     download = arg
133     if opt == "--complete":
134     complete = 1
135 paus 1.2 if opt == "--noInfo":
136     noInfo = True
137     if opt == "--forceCopy":
138     forceCopy = True
139     if opt == "--debug":
140     debug = True
141    
142     # Read parameters needed
143 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
144 paus 1.2 if not os.path.exists(crabFile):
145     cmd = "Crab file not found: %s" % crabFile
146     raise RuntimeError, cmd
147 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
148 paus 1.2 if not os.path.exists(cmsswFile):
149     cmd = "Cmssw file not found: %s" % cmsswFile
150     cmsswCfg = 'cmssw.py'
151 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
152 paus 1.2 if not os.path.exists(cmsswFile):
153     cmd = "Cmssw file not found: %s" % cmsswFile
154     cmd = " XXXX ERROR no valid configuration found XXXX"
155     raise RuntimeError, cmd
156    
157     # Find all started samples
158     path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
159     cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
160     for line in os.popen(cmd).readlines():
161     path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
162    
163     startedDsetList = findStartedDatasets(path)
164     #print " Dataset list: "
165     #for dataset in startedDsetList:
166     # print ' -> ' + dataset
167    
168     ongoingDsetList = findOngoingDatasets(path)
169     completedDsetList = findCompletedDatasets(path)
170     cleanupCompletedList(ongoingDsetList,completedDsetList)
171     completedDsetList = findCompletedDatasets(path)
172    
173     # Resolve the other mitCfg parameters from the configuration file
174 paus 1.3 cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
175 paus 1.2 if cmssw != '':
176     cmd = cmd + '.' + cmssw
177    
178    
179     print ''
180     join = 0
181     mitDataset = ""
182     fullLine = ""
183     bSlash = "\\";
184     for line in os.popen(cmd).readlines(): # run command
185     line = line[:-1]
186     #print 'Line: "' + line + '"'
187     # get ride of empty lines
188     if line == '':
189     continue
190     # get ride of commented lines and read steering parameters
191     if line[0] == '#':
192     names = line.split() # splitting every blank
193     if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
194     print 'No crontab has been set.... (LINE: ' + line + ')'
195     sys.exit(0)
196     continue
197    
198     # join lines
199     if join == 1:
200     fullLine += line
201     else:
202     fullLine = line
203    
204     # determine if finished or more is coming
205     if fullLine[-1] == bSlash:
206     join = 1
207     fullLine = fullLine[:-1]
208     else:
209     join = 0
210     fullLine = " ".join(str(fullLine).split()).strip()
211     # test whether there is a directory
212     names = fullLine.split() # splitting every blank
213     if debug == True:
214     print "FullLine: " + fullLine
215     cmsDataset = names[0]
216     mitDataset = names[1] # this is the equivalent MIT name of the dataset
217     nevents = int(names[2]) # number of events to be used in the production
218     procStatus = names[3]
219     local = names[4]
220    
221 paus 1.4 if pattern != '' and not re.search(pattern,mitDataset):
222     continue
223    
224 paus 1.5 # make sure we want to consider submission
225     if download != 1:
226     cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
227     ' --version=' + version + ' --noTestJob'
228     if cmssw != '':
229     cmd = cmd + " --cmssw=" + cmssw
230    
231     # check for errors (to be done)
232    
233     # check for the logical combinations
234     if not inList(mitDataset,startedDsetList):
235     #print ' new: ' + mitDataset
236     print ' submitting: ' + cmd
237     if exe == 1:
238     os.system(cmd)
239    
240     elif inList(mitDataset,ongoingDsetList):
241     #print ' sub: ' + mitDataset
242     print ' handled by jobSitter -- ' + mitDataset
243    
244     elif inList(mitDataset,completedDsetList):
245     if not noInfo:
246     print ' don: ' + mitDataset
247     else:
248     if complete == 1:
249     cmd = cmd + ' --complete'
250     #print ' toc: ' + mitDataset
251     print ' completing: ' + cmd
252     if exe == 1:
253     os.system(cmd)
254 paus 1.2
255 paus 1.5 # test download request
256     if local != "-" and download != -1:
257     localPath = local
258     cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
259     " --version=" + version
260     if cmssw != '':
261     cmd = cmd + " --cmssw=" + cmssw
262     if forceCopy:
263     cmd += ' --forceCopy'
264     print " " + cmd
265 paus 1.2 if exe == 1:
266 paus 1.5 status = os.system(cmd)
267 paus 1.2
268     if mitDataset == "":
269     print "ERROR - dataset not defined."
270     sys.exit(0)
271    
272     sys.exit(0)