ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.5
Committed: Fri Jul 30 18:41:11 2010 UTC (14 years, 9 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_017pre3, Mit_017pre2, Mit_017pre1, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c
Changes since 1.4: +50 -73 lines
Log Message:
Cleaned up and updated version.

File Contents

# Content
1 #!/usr/bin/env python
2 #---------------------------------------------------------------------------------------------------
3 # Script to identify all samples which are kept in our database.
4 #
5 # Author: C.Paus (September 23, 2008)
6 #---------------------------------------------------------------------------------------------------
7 import os,sys,getopt,re,string
8
9 def findStartedDatasets(path):
10 print " Collecting information over started samples"
11 datasetList = []
12
13 cmd = 'list ' + path
14 for line in os.popen(cmd).readlines(): # run command
15 line = line[:-1] # strip '\n'
16 f = line.split(" ")
17 size = f[0]
18 file = f[1]
19
20 #if debug == 1:
21 # print ' Debug:: adding: ' + file + ' with size ' + size
22 datasetList.append(file)
23
24 return datasetList
25
26 def findOngoingDatasets(path):
27 print " Collecting information over ongoing samples"
28 datasetList = []
29
30 cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
31 for line in os.popen(cmd).readlines(): # run command
32 line = line[:-1] # strip '\n'
33 f = line.split("/")
34 dataset = f[-1]
35 if re.search('crab_0',dataset):
36 dataset = f[-2]
37
38 #if debug == 1:
39 # print ' Debug:: adding: ' + dataset '
40 datasetList.append(dataset)
41
42 return datasetList
43
44 def findCompletedDatasets(path):
45 print " Collecting information over completed samples"
46 datasetList = []
47
48 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
49 for line in os.popen(cmd).readlines(): # run command
50 line = line[:-1] # strip '\n'
51 dataset = line
52
53 #if debug == 1:
54 # print ' Debug:: adding: ' + dataset '
55 datasetList.append(dataset)
56
57 return datasetList
58
59 def inList(file,list):
60 for entry in list:
61 if entry == file:
62 return True
63 return False
64
65 def cleanupCompletedList(ongoingDsetList,completedDsetList):
66 print " Update completed list with ongoing list"
67
68 for dataset in ongoingDsetList:
69 if inList(dataset,completedDsetList):
70 print ' -> removing fropm completed: ' + dataset
71 cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
72 cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
73 os.system(cmd)
74
75 #===================================================================================================
76 # Main starts here
77 #===================================================================================================
78 # Define string to explain usage of the script
79 usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
80 usage += " --version=<version>\n"
81 usage += " --cmssw=<name>\n"
82 usage += " --pattern=<name>\n"
83 usage += " --download=<int: -1,0,1>\n"
84 usage += " --exe\n"
85 usage += " --noInfo\n"
86 usage += " --forceCopy\n"
87 usage += " --debug\n"
88 usage += " --help\n\n"
89
90 # Define the valid options which can be specified and check out the command line
91 valid = ['mitCfg=','version=','cmssw=','pattern=','download=', \
92 'help','exe','complete','noInfo','forceCopy','debug']
93 try:
94 opts, args = getopt.getopt(sys.argv[1:], "", valid)
95 except getopt.GetoptError, ex:
96 print usage
97 print str(ex)
98 sys.exit(1)
99
100 # --------------------------------------------------------------------------------------------------
101 # Get all parameters for the production
102 # --------------------------------------------------------------------------------------------------
103 # Set defaults for each option
104 mitCfg = 'filefi'
105 version = '014'
106 cmssw = ''
107 pattern = ''
108 cmsswCfg = 'cmssw.cfg'
109 exe = 0
110 complete = 0
111 noInfo = False
112 download = -1
113 forceCopy = False
114 debug = False
115
116 # Read new values from the command line
117 for opt, arg in opts:
118 if opt == "--help":
119 print usage
120 sys.exit(0)
121 if opt == "--mitCfg":
122 mitCfg = arg
123 if opt == "--version":
124 version = arg
125 if opt == "--cmssw":
126 cmssw = arg
127 if opt == "--pattern":
128 pattern = arg
129 if opt == "--exe":
130 exe = 1
131 if opt == "--download":
132 download = arg
133 if opt == "--complete":
134 complete = 1
135 if opt == "--noInfo":
136 noInfo = True
137 if opt == "--forceCopy":
138 forceCopy = True
139 if opt == "--debug":
140 debug = True
141
142 # Read parameters needed
143 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
144 if not os.path.exists(crabFile):
145 cmd = "Crab file not found: %s" % crabFile
146 raise RuntimeError, cmd
147 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
148 if not os.path.exists(cmsswFile):
149 cmd = "Cmssw file not found: %s" % cmsswFile
150 cmsswCfg = 'cmssw.py'
151 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
152 if not os.path.exists(cmsswFile):
153 cmd = "Cmssw file not found: %s" % cmsswFile
154 cmd = " XXXX ERROR no valid configuration found XXXX"
155 raise RuntimeError, cmd
156
157 # Find all started samples
158 path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
159 cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
160 for line in os.popen(cmd).readlines():
161 path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
162
163 startedDsetList = findStartedDatasets(path)
164 #print " Dataset list: "
165 #for dataset in startedDsetList:
166 # print ' -> ' + dataset
167
168 ongoingDsetList = findOngoingDatasets(path)
169 completedDsetList = findCompletedDatasets(path)
170 cleanupCompletedList(ongoingDsetList,completedDsetList)
171 completedDsetList = findCompletedDatasets(path)
172
173 # Resolve the other mitCfg parameters from the configuration file
174 cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
175 if cmssw != '':
176 cmd = cmd + '.' + cmssw
177
178
179 print ''
180 join = 0
181 mitDataset = ""
182 fullLine = ""
183 bSlash = "\\";
184 for line in os.popen(cmd).readlines(): # run command
185 line = line[:-1]
186 #print 'Line: "' + line + '"'
187 # get ride of empty lines
188 if line == '':
189 continue
190 # get ride of commented lines and read steering parameters
191 if line[0] == '#':
192 names = line.split() # splitting every blank
193 if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
194 print 'No crontab has been set.... (LINE: ' + line + ')'
195 sys.exit(0)
196 continue
197
198 # join lines
199 if join == 1:
200 fullLine += line
201 else:
202 fullLine = line
203
204 # determine if finished or more is coming
205 if fullLine[-1] == bSlash:
206 join = 1
207 fullLine = fullLine[:-1]
208 else:
209 join = 0
210 fullLine = " ".join(str(fullLine).split()).strip()
211 # test whether there is a directory
212 names = fullLine.split() # splitting every blank
213 if debug == True:
214 print "FullLine: " + fullLine
215 cmsDataset = names[0]
216 mitDataset = names[1] # this is the equivalent MIT name of the dataset
217 nevents = int(names[2]) # number of events to be used in the production
218 procStatus = names[3]
219 local = names[4]
220
221 if pattern != '' and not re.search(pattern,mitDataset):
222 continue
223
224 # make sure we want to consider submission
225 if download != 1:
226 cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
227 ' --version=' + version + ' --noTestJob'
228 if cmssw != '':
229 cmd = cmd + " --cmssw=" + cmssw
230
231 # check for errors (to be done)
232
233 # check for the logical combinations
234 if not inList(mitDataset,startedDsetList):
235 #print ' new: ' + mitDataset
236 print ' submitting: ' + cmd
237 if exe == 1:
238 os.system(cmd)
239
240 elif inList(mitDataset,ongoingDsetList):
241 #print ' sub: ' + mitDataset
242 print ' handled by jobSitter -- ' + mitDataset
243
244 elif inList(mitDataset,completedDsetList):
245 if not noInfo:
246 print ' don: ' + mitDataset
247 else:
248 if complete == 1:
249 cmd = cmd + ' --complete'
250 #print ' toc: ' + mitDataset
251 print ' completing: ' + cmd
252 if exe == 1:
253 os.system(cmd)
254
255 # test download request
256 if local != "-" and download != -1:
257 localPath = local
258 cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
259 " --version=" + version
260 if cmssw != '':
261 cmd = cmd + " --cmssw=" + cmssw
262 if forceCopy:
263 cmd += ' --forceCopy'
264 print " " + cmd
265 if exe == 1:
266 status = os.system(cmd)
267
268 if mitDataset == "":
269 print "ERROR - dataset not defined."
270 sys.exit(0)
271
272 sys.exit(0)