ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.10
Committed: Tue Mar 22 02:48:51 2011 UTC (14 years, 1 month ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_024b, Mit_025pre1, Mit_024a, Mit_024, Mit_023, Mit_022a, Mit_022, Mit_020d, TMit_020d, Mit_020c, Mit_021, Mit_021pre2, Mit_021pre1, Mit_020b, Mit_020a, Mit_020
Changes since 1.9: +2 -2 lines
Log Message:
Version 020 updates (64 bit architecture).

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to identify all samples which are kept in our database.
4     #
5     # Author: C.Paus (September 23, 2008)
6     #---------------------------------------------------------------------------------------------------
7     import os,sys,getopt,re,string
8    
9     def findStartedDatasets(path):
10 paus 1.7 if debug == 1:
11     print " Collecting information over started samples"
12 paus 1.2 datasetList = []
13    
14     cmd = 'list ' + path
15     for line in os.popen(cmd).readlines(): # run command
16     line = line[:-1] # strip '\n'
17     f = line.split(" ")
18     size = f[0]
19     file = f[1]
20    
21     #if debug == 1:
22     # print ' Debug:: adding: ' + file + ' with size ' + size
23     datasetList.append(file)
24    
25     return datasetList
26    
27     def findOngoingDatasets(path):
28 paus 1.7 if debug == 1:
29     print " Collecting information over ongoing samples"
30 paus 1.2 datasetList = []
31    
32     cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
33     for line in os.popen(cmd).readlines(): # run command
34     line = line[:-1] # strip '\n'
35     f = line.split("/")
36     dataset = f[-1]
37     if re.search('crab_0',dataset):
38     dataset = f[-2]
39    
40     #if debug == 1:
41     # print ' Debug:: adding: ' + dataset '
42     datasetList.append(dataset)
43    
44     return datasetList
45    
46     def findCompletedDatasets(path):
47 paus 1.7 if debug == 1:
48     print " Collecting information over completed samples"
49 paus 1.2 datasetList = []
50    
51 paus 1.3 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
52 paus 1.2 for line in os.popen(cmd).readlines(): # run command
53     line = line[:-1] # strip '\n'
54     dataset = line
55    
56     #if debug == 1:
57     # print ' Debug:: adding: ' + dataset '
58     datasetList.append(dataset)
59    
60     return datasetList
61    
62     def inList(file,list):
63     for entry in list:
64     if entry == file:
65     return True
66     return False
67    
68     def cleanupCompletedList(ongoingDsetList,completedDsetList):
69 paus 1.7 if debug == 1:
70     print " Update completed list with ongoing list"
71 paus 1.2
72     for dataset in ongoingDsetList:
73     if inList(dataset,completedDsetList):
74     print ' -> removing fropm completed: ' + dataset
75     cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
76     cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
77     os.system(cmd)
78    
79     #===================================================================================================
80     # Main starts here
81     #===================================================================================================
82     # Define string to explain usage of the script
83     usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
84 paus 1.10 usage += " --version=<version> [ default: MIT_VERS ]\n"
85 paus 1.2 usage += " --cmssw=<name>\n"
86 paus 1.4 usage += " --pattern=<name>\n"
87 paus 1.5 usage += " --download=<int: -1,0,1>\n"
88 paus 1.7 usage += " --status=<int: -1,0,1>\n"
89     usage += " --useExistingLfns\n"
90 paus 1.2 usage += " --exe\n"
91     usage += " --noInfo\n"
92     usage += " --forceCopy\n"
93     usage += " --debug\n"
94     usage += " --help\n\n"
95    
96     # Define the valid options which can be specified and check out the command line
97 paus 1.7 valid = ['mitCfg=','version=','cmssw=','pattern=','download=','status=', \
98     'help','exe','useExistingLfns','complete','noInfo','forceCopy','debug']
99 paus 1.2 try:
100     opts, args = getopt.getopt(sys.argv[1:], "", valid)
101     except getopt.GetoptError, ex:
102     print usage
103     print str(ex)
104     sys.exit(1)
105    
106     # --------------------------------------------------------------------------------------------------
107     # Get all parameters for the production
108     # --------------------------------------------------------------------------------------------------
109     # Set defaults for each option
110 paus 1.7 mitCfg = 'filefi'
111 paus 1.10 version = os.environ['MIT_VERS']
112 paus 1.7 cmssw = ''
113     pattern = ''
114     cmsswCfg = 'cmssw.cfg'
115     exe = 0
116     useExistingLfns = False
117     complete = 0
118     noInfo = False
119     download = -1
120     status = -1
121     forceCopy = False
122     debug = False
123 paus 1.2
124     # Read new values from the command line
125     for opt, arg in opts:
126     if opt == "--help":
127     print usage
128     sys.exit(0)
129     if opt == "--mitCfg":
130 paus 1.7 mitCfg = arg
131 paus 1.2 if opt == "--version":
132 paus 1.7 version = arg
133 paus 1.2 if opt == "--cmssw":
134 paus 1.7 cmssw = arg
135 paus 1.4 if opt == "--pattern":
136 paus 1.7 pattern = arg
137 paus 1.2 if opt == "--exe":
138 paus 1.7 exe = 1
139     if opt == "--useExistingLfns":
140     useExistingLfns = True
141 paus 1.5 if opt == "--download":
142 paus 1.7 download = int(arg)
143     if opt == "--status":
144     status = int(arg)
145 paus 1.5 if opt == "--complete":
146 paus 1.7 complete = 1
147 paus 1.2 if opt == "--noInfo":
148 paus 1.7 noInfo = True
149 paus 1.2 if opt == "--forceCopy":
150 paus 1.7 forceCopy = True
151 paus 1.2 if opt == "--debug":
152 paus 1.7 debug = True
153 paus 1.2
154     # Read parameters needed
155 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
156 paus 1.2 if not os.path.exists(crabFile):
157     cmd = "Crab file not found: %s" % crabFile
158     raise RuntimeError, cmd
159 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
160 paus 1.2 if not os.path.exists(cmsswFile):
161     cmd = "Cmssw file not found: %s" % cmsswFile
162     cmsswCfg = 'cmssw.py'
163 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
164 paus 1.2 if not os.path.exists(cmsswFile):
165     cmd = "Cmssw file not found: %s" % cmsswFile
166     cmd = " XXXX ERROR no valid configuration found XXXX"
167     raise RuntimeError, cmd
168    
169     # Find all started samples
170     path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
171     cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
172     for line in os.popen(cmd).readlines():
173     path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
174    
175     startedDsetList = findStartedDatasets(path)
176     #print " Dataset list: "
177     #for dataset in startedDsetList:
178     # print ' -> ' + dataset
179    
180     ongoingDsetList = findOngoingDatasets(path)
181     completedDsetList = findCompletedDatasets(path)
182     cleanupCompletedList(ongoingDsetList,completedDsetList)
183     completedDsetList = findCompletedDatasets(path)
184    
185     # Resolve the other mitCfg parameters from the configuration file
186 paus 1.3 cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
187 paus 1.2 if cmssw != '':
188     cmd = cmd + '.' + cmssw
189    
190    
191     print ''
192 paus 1.7
193 paus 1.2 join = 0
194     mitDataset = ""
195     fullLine = ""
196     bSlash = "\\";
197 paus 1.9 printOpt = "-header"
198 paus 1.2 for line in os.popen(cmd).readlines(): # run command
199     line = line[:-1]
200     #print 'Line: "' + line + '"'
201     # get ride of empty lines
202     if line == '':
203     continue
204     # get ride of commented lines and read steering parameters
205     if line[0] == '#':
206     names = line.split() # splitting every blank
207     if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
208     print 'No crontab has been set.... (LINE: ' + line + ')'
209     sys.exit(0)
210     continue
211    
212     # join lines
213     if join == 1:
214     fullLine += line
215     else:
216     fullLine = line
217    
218     # determine if finished or more is coming
219     if fullLine[-1] == bSlash:
220     join = 1
221     fullLine = fullLine[:-1]
222     else:
223     join = 0
224     fullLine = " ".join(str(fullLine).split()).strip()
225     # test whether there is a directory
226     names = fullLine.split() # splitting every blank
227     if debug == True:
228     print "FullLine: " + fullLine
229     cmsDataset = names[0]
230     mitDataset = names[1] # this is the equivalent MIT name of the dataset
231     nevents = int(names[2]) # number of events to be used in the production
232     procStatus = names[3]
233     local = names[4]
234    
235 paus 1.4 if pattern != '' and not re.search(pattern,mitDataset):
236     continue
237    
238 paus 1.5 # make sure we want to consider submission
239 paus 1.7 if download != 1 and status != 1:
240 paus 1.5 cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
241     ' --version=' + version + ' --noTestJob'
242     if cmssw != '':
243     cmd = cmd + " --cmssw=" + cmssw
244 paus 1.7 if useExistingLfns:
245     cmd = cmd + " --useExistingLfns"
246 paus 1.5
247     # check for errors (to be done)
248    
249     # check for the logical combinations
250     if not inList(mitDataset,startedDsetList):
251     #print ' new: ' + mitDataset
252     print ' submitting: ' + cmd
253     if exe == 1:
254     os.system(cmd)
255    
256     elif inList(mitDataset,ongoingDsetList):
257     #print ' sub: ' + mitDataset
258 paus 1.7 if download != 1 and status != 1:
259     print ' handled by jobSitter -- ' + mitDataset
260 paus 1.5
261     elif inList(mitDataset,completedDsetList):
262     if not noInfo:
263     print ' don: ' + mitDataset
264     else:
265     if complete == 1:
266     cmd = cmd + ' --complete'
267     #print ' toc: ' + mitDataset
268     print ' completing: ' + cmd
269     if exe == 1:
270     os.system(cmd)
271 paus 1.2
272 paus 1.5 # test download request
273 paus 1.7 if status != -1:
274 paus 1.9 cmd = 'status.sh ' + mitCfg + '/' + version + ' ' + mitDataset + ' ' + printOpt
275 paus 1.7 if exe == 1:
276     rc = os.system(cmd)
277     else:
278     print " " + cmd
279 paus 1.9 printOpt = ""
280 paus 1.7
281     # test download request
282 paus 1.5 if local != "-" and download != -1:
283     localPath = local
284     cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
285     " --version=" + version
286     if cmssw != '':
287     cmd = cmd + " --cmssw=" + cmssw
288     if forceCopy:
289     cmd += ' --forceCopy'
290     print " " + cmd
291 paus 1.2 if exe == 1:
292 paus 1.7 rc = os.system(cmd)
293 paus 1.2
294     if mitDataset == "":
295     print "ERROR - dataset not defined."
296     sys.exit(0)
297    
298     sys.exit(0)