ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/findSamples.py
Revision: 1.7
Committed: Sun Dec 5 01:01:21 2010 UTC (14 years, 5 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_018, Mit_017
Changes since 1.6: +58 -30 lines
Log Message:
Next iteration with improved downloading tool.

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Script to identify all samples which are kept in our database.
4     #
5     # Author: C.Paus (September 23, 2008)
6     #---------------------------------------------------------------------------------------------------
7     import os,sys,getopt,re,string
8    
9     def findStartedDatasets(path):
10 paus 1.7 if debug == 1:
11     print " Collecting information over started samples"
12 paus 1.2 datasetList = []
13    
14     cmd = 'list ' + path
15     for line in os.popen(cmd).readlines(): # run command
16     line = line[:-1] # strip '\n'
17     f = line.split(" ")
18     size = f[0]
19     file = f[1]
20    
21     #if debug == 1:
22     # print ' Debug:: adding: ' + file + ' with size ' + size
23     datasetList.append(file)
24    
25     return datasetList
26    
27     def findOngoingDatasets(path):
28 paus 1.7 if debug == 1:
29     print " Collecting information over ongoing samples"
30 paus 1.2 datasetList = []
31    
32     cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
33     for line in os.popen(cmd).readlines(): # run command
34     line = line[:-1] # strip '\n'
35     f = line.split("/")
36     dataset = f[-1]
37     if re.search('crab_0',dataset):
38     dataset = f[-2]
39    
40     #if debug == 1:
41     # print ' Debug:: adding: ' + dataset '
42     datasetList.append(dataset)
43    
44     return datasetList
45    
46     def findCompletedDatasets(path):
47 paus 1.7 if debug == 1:
48     print " Collecting information over completed samples"
49 paus 1.2 datasetList = []
50    
51 paus 1.3 cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
52 paus 1.2 for line in os.popen(cmd).readlines(): # run command
53     line = line[:-1] # strip '\n'
54     dataset = line
55    
56     #if debug == 1:
57     # print ' Debug:: adding: ' + dataset '
58     datasetList.append(dataset)
59    
60     return datasetList
61    
62     def inList(file,list):
63     for entry in list:
64     if entry == file:
65     return True
66     return False
67    
68     def cleanupCompletedList(ongoingDsetList,completedDsetList):
69 paus 1.7 if debug == 1:
70     print " Update completed list with ongoing list"
71 paus 1.2
72     for dataset in ongoingDsetList:
73     if inList(dataset,completedDsetList):
74     print ' -> removing fropm completed: ' + dataset
75     cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
76     cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
77     os.system(cmd)
78    
79     #===================================================================================================
80     # Main starts here
81     #===================================================================================================
82     # Define string to explain usage of the script
83     usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
84     usage += " --version=<version>\n"
85     usage += " --cmssw=<name>\n"
86 paus 1.4 usage += " --pattern=<name>\n"
87 paus 1.5 usage += " --download=<int: -1,0,1>\n"
88 paus 1.7 usage += " --status=<int: -1,0,1>\n"
89     usage += " --useExistingLfns\n"
90 paus 1.2 usage += " --exe\n"
91     usage += " --noInfo\n"
92     usage += " --forceCopy\n"
93     usage += " --debug\n"
94     usage += " --help\n\n"
95    
96     # Define the valid options which can be specified and check out the command line
97 paus 1.7 valid = ['mitCfg=','version=','cmssw=','pattern=','download=','status=', \
98     'help','exe','useExistingLfns','complete','noInfo','forceCopy','debug']
99 paus 1.2 try:
100     opts, args = getopt.getopt(sys.argv[1:], "", valid)
101     except getopt.GetoptError, ex:
102     print usage
103     print str(ex)
104     sys.exit(1)
105    
106     # --------------------------------------------------------------------------------------------------
107     # Get all parameters for the production
108     # --------------------------------------------------------------------------------------------------
109     # Set defaults for each option
110 paus 1.7 mitCfg = 'filefi'
111     version = '016'
112     cmssw = ''
113     pattern = ''
114     cmsswCfg = 'cmssw.cfg'
115     exe = 0
116     useExistingLfns = False
117     complete = 0
118     noInfo = False
119     download = -1
120     status = -1
121     forceCopy = False
122     debug = False
123 paus 1.2
124     # Read new values from the command line
125     for opt, arg in opts:
126     if opt == "--help":
127     print usage
128     sys.exit(0)
129     if opt == "--mitCfg":
130 paus 1.7 mitCfg = arg
131 paus 1.2 if opt == "--version":
132 paus 1.7 version = arg
133 paus 1.2 if opt == "--cmssw":
134 paus 1.7 cmssw = arg
135 paus 1.4 if opt == "--pattern":
136 paus 1.7 pattern = arg
137 paus 1.2 if opt == "--exe":
138 paus 1.7 exe = 1
139     if opt == "--useExistingLfns":
140     useExistingLfns = True
141 paus 1.5 if opt == "--download":
142 paus 1.7 download = int(arg)
143     if opt == "--status":
144     status = int(arg)
145 paus 1.5 if opt == "--complete":
146 paus 1.7 complete = 1
147 paus 1.2 if opt == "--noInfo":
148 paus 1.7 noInfo = True
149 paus 1.2 if opt == "--forceCopy":
150 paus 1.7 forceCopy = True
151 paus 1.2 if opt == "--debug":
152 paus 1.7 debug = True
153 paus 1.2
154     # Read parameters needed
155 paus 1.3 crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
156 paus 1.2 if not os.path.exists(crabFile):
157     cmd = "Crab file not found: %s" % crabFile
158     raise RuntimeError, cmd
159 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
160 paus 1.2 if not os.path.exists(cmsswFile):
161     cmd = "Cmssw file not found: %s" % cmsswFile
162     cmsswCfg = 'cmssw.py'
163 paus 1.3 cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
164 paus 1.2 if not os.path.exists(cmsswFile):
165     cmd = "Cmssw file not found: %s" % cmsswFile
166     cmd = " XXXX ERROR no valid configuration found XXXX"
167     raise RuntimeError, cmd
168    
169     # Find all started samples
170     path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
171     cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
172     for line in os.popen(cmd).readlines():
173     path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
174    
175     startedDsetList = findStartedDatasets(path)
176     #print " Dataset list: "
177     #for dataset in startedDsetList:
178     # print ' -> ' + dataset
179    
180     ongoingDsetList = findOngoingDatasets(path)
181     completedDsetList = findCompletedDatasets(path)
182     cleanupCompletedList(ongoingDsetList,completedDsetList)
183     completedDsetList = findCompletedDatasets(path)
184    
185     # Resolve the other mitCfg parameters from the configuration file
186 paus 1.3 cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
187 paus 1.2 if cmssw != '':
188     cmd = cmd + '.' + cmssw
189    
190    
191     print ''
192 paus 1.7
193     if status == 1:
194     print 'BOOK SAMPLE All Done ToDo'
195     print '------------------------------------------------------------------'
196    
197 paus 1.2 join = 0
198     mitDataset = ""
199     fullLine = ""
200     bSlash = "\\";
201     for line in os.popen(cmd).readlines(): # run command
202     line = line[:-1]
203     #print 'Line: "' + line + '"'
204     # get ride of empty lines
205     if line == '':
206     continue
207     # get ride of commented lines and read steering parameters
208     if line[0] == '#':
209     names = line.split() # splitting every blank
210     if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
211     print 'No crontab has been set.... (LINE: ' + line + ')'
212     sys.exit(0)
213     continue
214    
215     # join lines
216     if join == 1:
217     fullLine += line
218     else:
219     fullLine = line
220    
221     # determine if finished or more is coming
222     if fullLine[-1] == bSlash:
223     join = 1
224     fullLine = fullLine[:-1]
225     else:
226     join = 0
227     fullLine = " ".join(str(fullLine).split()).strip()
228     # test whether there is a directory
229     names = fullLine.split() # splitting every blank
230     if debug == True:
231     print "FullLine: " + fullLine
232     cmsDataset = names[0]
233     mitDataset = names[1] # this is the equivalent MIT name of the dataset
234     nevents = int(names[2]) # number of events to be used in the production
235     procStatus = names[3]
236     local = names[4]
237    
238 paus 1.4 if pattern != '' and not re.search(pattern,mitDataset):
239     continue
240    
241 paus 1.5 # make sure we want to consider submission
242 paus 1.7 if download != 1 and status != 1:
243 paus 1.5 cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
244     ' --version=' + version + ' --noTestJob'
245     if cmssw != '':
246     cmd = cmd + " --cmssw=" + cmssw
247 paus 1.7 if useExistingLfns:
248     cmd = cmd + " --useExistingLfns"
249 paus 1.5
250     # check for errors (to be done)
251    
252     # check for the logical combinations
253     if not inList(mitDataset,startedDsetList):
254     #print ' new: ' + mitDataset
255     print ' submitting: ' + cmd
256     if exe == 1:
257     os.system(cmd)
258    
259     elif inList(mitDataset,ongoingDsetList):
260     #print ' sub: ' + mitDataset
261 paus 1.7 if download != 1 and status != 1:
262     print ' handled by jobSitter -- ' + mitDataset
263 paus 1.5
264     elif inList(mitDataset,completedDsetList):
265     if not noInfo:
266     print ' don: ' + mitDataset
267     else:
268     if complete == 1:
269     cmd = cmd + ' --complete'
270     #print ' toc: ' + mitDataset
271     print ' completing: ' + cmd
272     if exe == 1:
273     os.system(cmd)
274 paus 1.2
275 paus 1.5 # test download request
276 paus 1.7 if status != -1:
277     cmd = 'status.sh ' + mitCfg + '/' + version + ' ' + mitDataset
278     if exe == 1:
279     rc = os.system(cmd)
280     else:
281     print " " + cmd
282    
283     # test download request
284 paus 1.5 if local != "-" and download != -1:
285     localPath = local
286     cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
287     " --version=" + version
288     if cmssw != '':
289     cmd = cmd + " --cmssw=" + cmssw
290     if forceCopy:
291     cmd += ' --forceCopy'
292     print " " + cmd
293 paus 1.2 if exe == 1:
294 paus 1.7 rc = os.system(cmd)
295 paus 1.2
296     if mitDataset == "":
297     print "ERROR - dataset not defined."
298     sys.exit(0)
299    
300     sys.exit(0)