ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.4
Committed: Thu Jul 8 02:47:24 2010 UTC (14 years, 10 months ago) by paus
Content type: text/x-python
Branch: MAIN
Changes since 1.3: +9 -1 lines
Log Message:
Add pattern.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to identify all samples which are kept in our database.
4     #
5     # Author: C.Paus (September 23, 2008)
6     #---------------------------------------------------------------------------------------------------
7     import os,sys,getopt,re,string
8    
9     def findStartedDatasets(path):
10     print " Collecting information over started samples"
11     datasetList = []
12    
13     cmd = 'list ' + path
14     for line in os.popen(cmd).readlines(): # run command
15     line = line[:-1] # strip '\n'
16     f = line.split(" ")
17     size = f[0]
18     file = f[1]
19    
20     #if debug == 1:
21     # print ' Debug:: adding: ' + file + ' with size ' + size
22     datasetList.append(file)
23    
24     return datasetList
25    
26     def findOngoingDatasets(path):
27     print " Collecting information over ongoing samples"
28     datasetList = []
29    
30     cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
31     for line in os.popen(cmd).readlines(): # run command
32     line = line[:-1] # strip '\n'
33     f = line.split("/")
34     dataset = f[-1]
35     if re.search('crab_0',dataset):
36     dataset = f[-2]
37    
38     #if debug == 1:
39     # print ' Debug:: adding: ' + dataset '
40     datasetList.append(dataset)
41    
42     return datasetList
43    
44     def findCompletedDatasets(path):
45     print " Collecting information over completed samples"
46     datasetList = []
47    
48 paus 1.3 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
49 paus 1.2 for line in os.popen(cmd).readlines(): # run command
50     line = line[:-1] # strip '\n'
51     dataset = line
52    
53     #if debug == 1:
54     # print ' Debug:: adding: ' + dataset '
55     datasetList.append(dataset)
56    
57     return datasetList
58    
59     def inList(file,list):
60     for entry in list:
61     if entry == file:
62     return True
63     return False
64    
65     def cleanupCompletedList(ongoingDsetList,completedDsetList):
66     print " Update completed list with ongoing list"
67    
68     for dataset in ongoingDsetList:
69     if inList(dataset,completedDsetList):
70     print ' -> removing fropm completed: ' + dataset
71     cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
72     cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
73     os.system(cmd)
74    
75     #===================================================================================================
76     # Main starts here
77     #===================================================================================================
78     # Define string to explain usage of the script
79     usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
80     usage += " --version=<version>\n"
81     usage += " --cmssw=<name>\n"
82 paus 1.4 usage += " --pattern=<name>\n"
83 paus 1.2 usage += " --exe\n"
84     usage += " --noInfo\n"
85     usage += " --noDownload\n"
86     usage += " --forceCopy\n"
87     usage += " --debug\n"
88     usage += " --help\n\n"
89    
90     # Define the valid options which can be specified and check out the command line
91 paus 1.4 valid = ['mitCfg=','version=','cmssw=','pattern=',\
92     'help','exe','noInfo','noDownload','forceCopy','debug']
93 paus 1.2 try:
94     opts, args = getopt.getopt(sys.argv[1:], "", valid)
95     except getopt.GetoptError, ex:
96     print usage
97     print str(ex)
98     sys.exit(1)
99    
100     # --------------------------------------------------------------------------------------------------
101     # Get all parameters for the production
102     # --------------------------------------------------------------------------------------------------
103     # Set defaults for each option
104     mitCfg = 'filefi'
105 paus 1.3 version = '014'
106 paus 1.2 cmssw = ''
107 paus 1.4 pattern = ''
108 paus 1.2 cmsswCfg = 'cmssw.cfg'
109     exe = 0
110     noInfo = False
111     noDownload = False
112     forceCopy = False
113     debug = False
114    
115     # Read new values from the command line
116     for opt, arg in opts:
117     if opt == "--help":
118     print usage
119     sys.exit(0)
120     if opt == "--mitCfg":
121     mitCfg = arg
122     if opt == "--version":
123     version = arg
124     if opt == "--cmssw":
125     cmssw = arg
126 paus 1.4 if opt == "--pattern":
127     pattern = arg
128 paus 1.2 if opt == "--exe":
129     exe = 1
130     if opt == "--noInfo":
131     noInfo = True
132     if opt == "--noDownload":
133     noDownload = True
134     if opt == "--forceCopy":
135     forceCopy = True
136     if opt == "--debug":
137     debug = True
138    
139     # Read parameters needed
140 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
141 paus 1.2 if not os.path.exists(crabFile):
142     cmd = "Crab file not found: %s" % crabFile
143     raise RuntimeError, cmd
144 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
145 paus 1.2 if not os.path.exists(cmsswFile):
146     cmd = "Cmssw file not found: %s" % cmsswFile
147     cmsswCfg = 'cmssw.py'
148 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
149 paus 1.2 if not os.path.exists(cmsswFile):
150     cmd = "Cmssw file not found: %s" % cmsswFile
151     cmd = " XXXX ERROR no valid configuration found XXXX"
152     raise RuntimeError, cmd
153    
154     # Find all started samples
155     path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
156     cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
157     for line in os.popen(cmd).readlines():
158     path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
159    
160     startedDsetList = findStartedDatasets(path)
161     #print " Dataset list: "
162     #for dataset in startedDsetList:
163     # print ' -> ' + dataset
164    
165     ongoingDsetList = findOngoingDatasets(path)
166     completedDsetList = findCompletedDatasets(path)
167     cleanupCompletedList(ongoingDsetList,completedDsetList)
168     completedDsetList = findCompletedDatasets(path)
169    
170     # Resolve the other mitCfg parameters from the configuration file
171 paus 1.3 cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
172 paus 1.2 if cmssw != '':
173     cmd = cmd + '.' + cmssw
174    
175    
176     print ''
177     join = 0
178     mitDataset = ""
179     fullLine = ""
180     bSlash = "\\";
181     for line in os.popen(cmd).readlines(): # run command
182     line = line[:-1]
183     #print 'Line: "' + line + '"'
184     # get ride of empty lines
185     if line == '':
186     continue
187     # get ride of commented lines and read steering parameters
188     if line[0] == '#':
189     names = line.split() # splitting every blank
190     if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
191     print 'No crontab has been set.... (LINE: ' + line + ')'
192     sys.exit(0)
193     continue
194    
195     # join lines
196     if join == 1:
197     fullLine += line
198     else:
199     fullLine = line
200    
201     # determine if finished or more is coming
202     if fullLine[-1] == bSlash:
203     join = 1
204     fullLine = fullLine[:-1]
205     else:
206     join = 0
207     fullLine = " ".join(str(fullLine).split()).strip()
208     # test whether there is a directory
209     names = fullLine.split() # splitting every blank
210     if debug == True:
211     print "FullLine: " + fullLine
212     cmsDataset = names[0]
213     mitDataset = names[1] # this is the equivalent MIT name of the dataset
214     nevents = int(names[2]) # number of events to be used in the production
215     procStatus = names[3]
216     local = names[4]
217    
218 paus 1.4 if pattern != '' and not re.search(pattern,mitDataset):
219     continue
220    
221 paus 1.2 cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
222     ' --version=' + version + ' --noTestJob'
223     if cmssw != '':
224     cmd = cmd + " --cmssw=" + cmssw
225    
226     # check for errors (to be done)
227    
228     # check for the logical combinations
229     if not inList(mitDataset,startedDsetList):
230    
231     #print ' new: ' + mitDataset
232     print ' submitting: ' + cmd
233     if exe == 1:
234     os.system(cmd)
235    
236     elif inList(mitDataset,ongoingDsetList):
237    
238     #print ' sub: ' + mitDataset
239     print ' handled by jobSitter -- ' + mitDataset
240    
241     elif inList(mitDataset,completedDsetList):
242     if not noInfo:
243     print ' don: ' + mitDataset
244     else:
245    
246     cmd = cmd + ' --complete'
247     #print ' toc: ' + mitDataset
248     print ' completing: ' + cmd
249     if exe == 1:
250     os.system(cmd)
251    
252     # test download request
253     if local != "-" and not noDownload:
254     localPath = local
255     cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
256     " --version=" + version
257     if cmssw != '':
258     cmd = cmd + " --cmssw=" + cmssw
259     if forceCopy:
260     cmd += ' --forceCopy'
261     print " " + cmd
262     if exe == 1:
263     status = os.system(cmd)
264    
265     ## if procStatus == "new":
266     ## print " " + cmd
267     ## if exe == 1:
268     ## status = os.system(cmd)
269     ## elif procStatus == "com":
270     ## cmd = cmd + ' --noTestJob --complete'
271     ## print " " + cmd
272     ## if exe == 1:
273     ## status = os.system(cmd)
274     ## elif local != "-" and not noDownload:
275     ## localPath = local
276     ## cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
277     ## " --version=" + version
278     ## if forceCopy:
279     ## cmd += ' --forceCopy'
280     ## print " " + cmd
281     ## if exe == 1:
282     ## status = os.system(cmd)
283     ## else:
284     ## if not noInfo:
285     ## print " Sample Info: " + fullLine
286    
287     if mitDataset == "":
288     print "ERROR - dataset not defined."
289     sys.exit(0)
290    
291     sys.exit(0)
292    
293     # Say what we do now
294     print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
295     ' per job]'