ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.1.2.1
Committed: Sat Jun 5 01:49:21 2010 UTC (14 years, 11 months ago) by paus
Content type: text/x-python
Branch: Mit_013c
Changes since 1.1: +287 -0 lines
Log Message:
first reasonably working production environment

File Contents

# User Rev Content
1 paus 1.1.2.1 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to identify all samples which are kept in our database.
4     #
5     # Author: C.Paus (September 23, 2008)
6     #---------------------------------------------------------------------------------------------------
7     import os,sys,getopt,re,string
8    
9     def findStartedDatasets(path):
10     print " Collecting information over started samples"
11     datasetList = []
12    
13     cmd = 'list ' + path
14     for line in os.popen(cmd).readlines(): # run command
15     line = line[:-1] # strip '\n'
16     f = line.split(" ")
17     size = f[0]
18     file = f[1]
19    
20     #if debug == 1:
21     # print ' Debug:: adding: ' + file + ' with size ' + size
22     datasetList.append(file)
23    
24     return datasetList
25    
26     def findOngoingDatasets(path):
27     print " Collecting information over ongoing samples"
28     datasetList = []
29    
30     cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
31     for line in os.popen(cmd).readlines(): # run command
32     line = line[:-1] # strip '\n'
33     f = line.split("/")
34     dataset = f[-1]
35     if re.search('crab_0',dataset):
36     dataset = f[-2]
37    
38     #if debug == 1:
39     # print ' Debug:: adding: ' + dataset '
40     datasetList.append(dataset)
41    
42     return datasetList
43    
44     def findCompletedDatasets(path):
45     print " Collecting information over completed samples"
46     datasetList = []
47    
48     cmd = 'cat ' + mitCfg + '/' + version + '/Completed 2> /dev/null'
49     for line in os.popen(cmd).readlines(): # run command
50     line = line[:-1] # strip '\n'
51     dataset = line
52    
53     #if debug == 1:
54     # print ' Debug:: adding: ' + dataset '
55     datasetList.append(dataset)
56    
57     return datasetList
58    
59     def inList(file,list):
60     for entry in list:
61     if entry == file:
62     return True
63     return False
64    
65     def cleanupCompletedList(ongoingDsetList,completedDsetList):
66     print " Update completed list with ongoing list"
67    
68     for dataset in ongoingDsetList:
69     if inList(dataset,completedDsetList):
70     print ' -> removing fropm completed: ' + dataset
71     cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
72     cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
73     os.system(cmd)
74    
75     #===================================================================================================
76     # Main starts here
77     #===================================================================================================
78     # Define string to explain usage of the script
79     usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
80     usage += " --version=<version>\n"
81     usage += " --cmssw=<name>\n"
82     usage += " --exe\n"
83     usage += " --noInfo\n"
84     usage += " --noDownload\n"
85     usage += " --forceCopy\n"
86     usage += " --debug\n"
87     usage += " --help\n\n"
88    
89     # Define the valid options which can be specified and check out the command line
90     valid = ['mitCfg=','version=','cmssw=','help','exe','noInfo','noDownload','forceCopy','debug']
91     try:
92     opts, args = getopt.getopt(sys.argv[1:], "", valid)
93     except getopt.GetoptError, ex:
94     print usage
95     print str(ex)
96     sys.exit(1)
97    
98     # --------------------------------------------------------------------------------------------------
99     # Get all parameters for the production
100     # --------------------------------------------------------------------------------------------------
101     # Set defaults for each option
102     mitCfg = 'filefi'
103     version = '013'
104     cmssw = ''
105     cmsswCfg = 'cmssw.cfg'
106     exe = 0
107     noInfo = False
108     noDownload = False
109     forceCopy = False
110     debug = False
111    
112     # Read new values from the command line
113     for opt, arg in opts:
114     if opt == "--help":
115     print usage
116     sys.exit(0)
117     if opt == "--mitCfg":
118     mitCfg = arg
119     if opt == "--version":
120     version = arg
121     if opt == "--cmssw":
122     cmssw = arg
123     if opt == "--exe":
124     exe = 1
125     if opt == "--noInfo":
126     noInfo = True
127     if opt == "--noDownload":
128     noDownload = True
129     if opt == "--forceCopy":
130     forceCopy = True
131     if opt == "--debug":
132     debug = True
133    
134     # Read parameters needed
135     crabFile = mitCfg + '/' + version + '/' + 'crab.cfg'
136     if not os.path.exists(crabFile):
137     cmd = "Crab file not found: %s" % crabFile
138     raise RuntimeError, cmd
139     cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg
140     if not os.path.exists(cmsswFile):
141     cmd = "Cmssw file not found: %s" % cmsswFile
142     cmsswCfg = 'cmssw.py'
143     cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg
144     if not os.path.exists(cmsswFile):
145     cmd = "Cmssw file not found: %s" % cmsswFile
146     cmd = " XXXX ERROR no valid configuration found XXXX"
147     raise RuntimeError, cmd
148    
149     # Find all started samples
150     path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
151     cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
152     for line in os.popen(cmd).readlines():
153     path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
154    
155     startedDsetList = findStartedDatasets(path)
156     #print " Dataset list: "
157     #for dataset in startedDsetList:
158     # print ' -> ' + dataset
159    
160     ongoingDsetList = findOngoingDatasets(path)
161     completedDsetList = findCompletedDatasets(path)
162     cleanupCompletedList(ongoingDsetList,completedDsetList)
163     completedDsetList = findCompletedDatasets(path)
164    
165     # Resolve the other mitCfg parameters from the configuration file
166     cmd = 'cat ' + mitCfg + '/' + version + '/' + 'Productions'
167     if cmssw != '':
168     cmd = cmd + '.' + cmssw
169    
170    
171     print ''
172     join = 0
173     mitDataset = ""
174     fullLine = ""
175     bSlash = "\\";
176     for line in os.popen(cmd).readlines(): # run command
177     line = line[:-1]
178     #print 'Line: "' + line + '"'
179     # get ride of empty lines
180     if line == '':
181     continue
182     # get ride of commented lines and read steering parameters
183     if line[0] == '#':
184     names = line.split() # splitting every blank
185     if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
186     print 'No crontab has been set.... (LINE: ' + line + ')'
187     sys.exit(0)
188     continue
189    
190     # join lines
191     if join == 1:
192     fullLine += line
193     else:
194     fullLine = line
195    
196     # determine if finished or more is coming
197     if fullLine[-1] == bSlash:
198     join = 1
199     fullLine = fullLine[:-1]
200     else:
201     join = 0
202     fullLine = " ".join(str(fullLine).split()).strip()
203     # test whether there is a directory
204     names = fullLine.split() # splitting every blank
205     if debug == True:
206     print "FullLine: " + fullLine
207     cmsDataset = names[0]
208     mitDataset = names[1] # this is the equivalent MIT name of the dataset
209     nevents = int(names[2]) # number of events to be used in the production
210     procStatus = names[3]
211     local = names[4]
212    
213     cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
214     ' --version=' + version + ' --noTestJob'
215     if cmssw != '':
216     cmd = cmd + " --cmssw=" + cmssw
217    
218     # check for errors (to be done)
219    
220     # check for the logical combinations
221     if not inList(mitDataset,startedDsetList):
222    
223     #print ' new: ' + mitDataset
224     print ' submitting: ' + cmd
225     if exe == 1:
226     os.system(cmd)
227    
228     elif inList(mitDataset,ongoingDsetList):
229    
230     #print ' sub: ' + mitDataset
231     print ' handled by jobSitter -- ' + mitDataset
232    
233     elif inList(mitDataset,completedDsetList):
234     if not noInfo:
235     print ' don: ' + mitDataset
236     else:
237    
238     cmd = cmd + ' --complete'
239     #print ' toc: ' + mitDataset
240     print ' completing: ' + cmd
241     if exe == 1:
242     os.system(cmd)
243    
244     # test download request
245     if local != "-" and not noDownload:
246     localPath = local
247     cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
248     " --version=" + version
249     if cmssw != '':
250     cmd = cmd + " --cmssw=" + cmssw
251     if forceCopy:
252     cmd += ' --forceCopy'
253     print " " + cmd
254     if exe == 1:
255     status = os.system(cmd)
256    
257     ## if procStatus == "new":
258     ## print " " + cmd
259     ## if exe == 1:
260     ## status = os.system(cmd)
261     ## elif procStatus == "com":
262     ## cmd = cmd + ' --noTestJob --complete'
263     ## print " " + cmd
264     ## if exe == 1:
265     ## status = os.system(cmd)
266     ## elif local != "-" and not noDownload:
267     ## localPath = local
268     ## cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
269     ## " --version=" + version
270     ## if forceCopy:
271     ## cmd += ' --forceCopy'
272     ## print " " + cmd
273     ## if exe == 1:
274     ## status = os.system(cmd)
275     ## else:
276     ## if not noInfo:
277     ## print " Sample Info: " + fullLine
278    
279     if mitDataset == "":
280     print "ERROR - dataset not defined."
281     sys.exit(0)
282    
283     sys.exit(0)
284    
285     # Say what we do now
286     print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
287     ' per job]'