ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.10
Committed: Tue Mar 22 02:48:51 2011 UTC (14 years, 1 month ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_024b, Mit_025pre1, Mit_024a, Mit_024, Mit_023, Mit_022a, Mit_022, Mit_020d, TMit_020d, Mit_020c, Mit_021, Mit_021pre2, Mit_021pre1, Mit_020b, Mit_020a, Mit_020
Changes since 1.9: +2 -2 lines
Log Message:
Version 020 updates (64 bit architecture).

File Contents

# Content
1 #!/usr/bin/env python
2 #---------------------------------------------------------------------------------------------------
3 # Script to identify all samples which are kept in our database.
4 #
5 # Author: C.Paus (September 23, 2008)
6 #---------------------------------------------------------------------------------------------------
7 import os,sys,getopt,re,string
8
9 def findStartedDatasets(path):
10 if debug == 1:
11 print " Collecting information over started samples"
12 datasetList = []
13
14 cmd = 'list ' + path
15 for line in os.popen(cmd).readlines(): # run command
16 line = line[:-1] # strip '\n'
17 f = line.split(" ")
18 size = f[0]
19 file = f[1]
20
21 #if debug == 1:
22 # print ' Debug:: adding: ' + file + ' with size ' + size
23 datasetList.append(file)
24
25 return datasetList
26
27 def findOngoingDatasets(path):
28 if debug == 1:
29 print " Collecting information over ongoing samples"
30 datasetList = []
31
32 cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
33 for line in os.popen(cmd).readlines(): # run command
34 line = line[:-1] # strip '\n'
35 f = line.split("/")
36 dataset = f[-1]
37 if re.search('crab_0',dataset):
38 dataset = f[-2]
39
40 #if debug == 1:
41 # print ' Debug:: adding: ' + dataset '
42 datasetList.append(dataset)
43
44 return datasetList
45
46 def findCompletedDatasets(path):
47 if debug == 1:
48 print " Collecting information over completed samples"
49 datasetList = []
50
51 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
52 for line in os.popen(cmd).readlines(): # run command
53 line = line[:-1] # strip '\n'
54 dataset = line
55
56 #if debug == 1:
57 # print ' Debug:: adding: ' + dataset '
58 datasetList.append(dataset)
59
60 return datasetList
61
62 def inList(file,list):
63 for entry in list:
64 if entry == file:
65 return True
66 return False
67
68 def cleanupCompletedList(ongoingDsetList,completedDsetList):
69 if debug == 1:
70 print " Update completed list with ongoing list"
71
72 for dataset in ongoingDsetList:
73 if inList(dataset,completedDsetList):
74 print ' -> removing fropm completed: ' + dataset
75 cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
76 cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
77 os.system(cmd)
78
79 #===================================================================================================
80 # Main starts here
81 #===================================================================================================
82 # Define string to explain usage of the script
83 usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
84 usage += " --version=<version> [ default: MIT_VERS ]\n"
85 usage += " --cmssw=<name>\n"
86 usage += " --pattern=<name>\n"
87 usage += " --download=<int: -1,0,1>\n"
88 usage += " --status=<int: -1,0,1>\n"
89 usage += " --useExistingLfns\n"
90 usage += " --exe\n"
91 usage += " --noInfo\n"
92 usage += " --forceCopy\n"
93 usage += " --debug\n"
94 usage += " --help\n\n"
95
96 # Define the valid options which can be specified and check out the command line
97 valid = ['mitCfg=','version=','cmssw=','pattern=','download=','status=', \
98 'help','exe','useExistingLfns','complete','noInfo','forceCopy','debug']
99 try:
100 opts, args = getopt.getopt(sys.argv[1:], "", valid)
101 except getopt.GetoptError, ex:
102 print usage
103 print str(ex)
104 sys.exit(1)
105
106 # --------------------------------------------------------------------------------------------------
107 # Get all parameters for the production
108 # --------------------------------------------------------------------------------------------------
109 # Set defaults for each option
110 mitCfg = 'filefi'
111 version = os.environ['MIT_VERS']
112 cmssw = ''
113 pattern = ''
114 cmsswCfg = 'cmssw.cfg'
115 exe = 0
116 useExistingLfns = False
117 complete = 0
118 noInfo = False
119 download = -1
120 status = -1
121 forceCopy = False
122 debug = False
123
124 # Read new values from the command line
125 for opt, arg in opts:
126 if opt == "--help":
127 print usage
128 sys.exit(0)
129 if opt == "--mitCfg":
130 mitCfg = arg
131 if opt == "--version":
132 version = arg
133 if opt == "--cmssw":
134 cmssw = arg
135 if opt == "--pattern":
136 pattern = arg
137 if opt == "--exe":
138 exe = 1
139 if opt == "--useExistingLfns":
140 useExistingLfns = True
141 if opt == "--download":
142 download = int(arg)
143 if opt == "--status":
144 status = int(arg)
145 if opt == "--complete":
146 complete = 1
147 if opt == "--noInfo":
148 noInfo = True
149 if opt == "--forceCopy":
150 forceCopy = True
151 if opt == "--debug":
152 debug = True
153
154 # Read parameters needed
155 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
156 if not os.path.exists(crabFile):
157 cmd = "Crab file not found: %s" % crabFile
158 raise RuntimeError, cmd
159 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
160 if not os.path.exists(cmsswFile):
161 cmd = "Cmssw file not found: %s" % cmsswFile
162 cmsswCfg = 'cmssw.py'
163 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
164 if not os.path.exists(cmsswFile):
165 cmd = "Cmssw file not found: %s" % cmsswFile
166 cmd = " XXXX ERROR no valid configuration found XXXX"
167 raise RuntimeError, cmd
168
169 # Find all started samples
170 path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
171 cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
172 for line in os.popen(cmd).readlines():
173 path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
174
175 startedDsetList = findStartedDatasets(path)
176 #print " Dataset list: "
177 #for dataset in startedDsetList:
178 # print ' -> ' + dataset
179
180 ongoingDsetList = findOngoingDatasets(path)
181 completedDsetList = findCompletedDatasets(path)
182 cleanupCompletedList(ongoingDsetList,completedDsetList)
183 completedDsetList = findCompletedDatasets(path)
184
185 # Resolve the other mitCfg parameters from the configuration file
186 cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
187 if cmssw != '':
188 cmd = cmd + '.' + cmssw
189
190
191 print ''
192
193 join = 0
194 mitDataset = ""
195 fullLine = ""
196 bSlash = "\\";
197 printOpt = "-header"
198 for line in os.popen(cmd).readlines(): # run command
199 line = line[:-1]
200 #print 'Line: "' + line + '"'
201 # get ride of empty lines
202 if line == '':
203 continue
204 # get ride of commented lines and read steering parameters
205 if line[0] == '#':
206 names = line.split() # splitting every blank
207 if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
208 print 'No crontab has been set.... (LINE: ' + line + ')'
209 sys.exit(0)
210 continue
211
212 # join lines
213 if join == 1:
214 fullLine += line
215 else:
216 fullLine = line
217
218 # determine if finished or more is coming
219 if fullLine[-1] == bSlash:
220 join = 1
221 fullLine = fullLine[:-1]
222 else:
223 join = 0
224 fullLine = " ".join(str(fullLine).split()).strip()
225 # test whether there is a directory
226 names = fullLine.split() # splitting every blank
227 if debug == True:
228 print "FullLine: " + fullLine
229 cmsDataset = names[0]
230 mitDataset = names[1] # this is the equivalent MIT name of the dataset
231 nevents = int(names[2]) # number of events to be used in the production
232 procStatus = names[3]
233 local = names[4]
234
235 if pattern != '' and not re.search(pattern,mitDataset):
236 continue
237
238 # make sure we want to consider submission
239 if download != 1 and status != 1:
240 cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
241 ' --version=' + version + ' --noTestJob'
242 if cmssw != '':
243 cmd = cmd + " --cmssw=" + cmssw
244 if useExistingLfns:
245 cmd = cmd + " --useExistingLfns"
246
247 # check for errors (to be done)
248
249 # check for the logical combinations
250 if not inList(mitDataset,startedDsetList):
251 #print ' new: ' + mitDataset
252 print ' submitting: ' + cmd
253 if exe == 1:
254 os.system(cmd)
255
256 elif inList(mitDataset,ongoingDsetList):
257 #print ' sub: ' + mitDataset
258 if download != 1 and status != 1:
259 print ' handled by jobSitter -- ' + mitDataset
260
261 elif inList(mitDataset,completedDsetList):
262 if not noInfo:
263 print ' don: ' + mitDataset
264 else:
265 if complete == 1:
266 cmd = cmd + ' --complete'
267 #print ' toc: ' + mitDataset
268 print ' completing: ' + cmd
269 if exe == 1:
270 os.system(cmd)
271
272 # test download request
273 if status != -1:
274 cmd = 'status.sh ' + mitCfg + '/' + version + ' ' + mitDataset + ' ' + printOpt
275 if exe == 1:
276 rc = os.system(cmd)
277 else:
278 print " " + cmd
279 printOpt = ""
280
281 # test download request
282 if local != "-" and download != -1:
283 localPath = local
284 cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
285 " --version=" + version
286 if cmssw != '':
287 cmd = cmd + " --cmssw=" + cmssw
288 if forceCopy:
289 cmd += ' --forceCopy'
290 print " " + cmd
291 if exe == 1:
292 rc = os.system(cmd)
293
294 if mitDataset == "":
295 print "ERROR - dataset not defined."
296 sys.exit(0)
297
298 sys.exit(0)