1 |
paus |
1.1.2.1 |
#!/usr/bin/env python
|
2 |
|
|
#---------------------------------------------------------------------------------------------------
|
3 |
|
|
# Script to identify all samples which are kept in our database.
|
4 |
|
|
#
|
5 |
|
|
# Author: C.Paus (September 23, 2008)
|
6 |
|
|
#---------------------------------------------------------------------------------------------------
|
7 |
|
|
import os,sys,getopt,re,string
|
8 |
|
|
|
9 |
|
|
def findStartedDatasets(path):
|
10 |
|
|
print " Collecting information over started samples"
|
11 |
|
|
datasetList = []
|
12 |
|
|
|
13 |
|
|
cmd = 'list ' + path
|
14 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
15 |
|
|
line = line[:-1] # strip '\n'
|
16 |
|
|
f = line.split(" ")
|
17 |
|
|
size = f[0]
|
18 |
|
|
file = f[1]
|
19 |
|
|
|
20 |
|
|
#if debug == 1:
|
21 |
|
|
# print ' Debug:: adding: ' + file + ' with size ' + size
|
22 |
|
|
datasetList.append(file)
|
23 |
|
|
|
24 |
|
|
return datasetList
|
25 |
|
|
|
26 |
|
|
def findOngoingDatasets(path):
|
27 |
|
|
print " Collecting information over ongoing samples"
|
28 |
|
|
datasetList = []
|
29 |
|
|
|
30 |
|
|
cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
|
31 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
32 |
|
|
line = line[:-1] # strip '\n'
|
33 |
|
|
f = line.split("/")
|
34 |
|
|
dataset = f[-1]
|
35 |
|
|
if re.search('crab_0',dataset):
|
36 |
|
|
dataset = f[-2]
|
37 |
|
|
|
38 |
|
|
#if debug == 1:
|
39 |
|
|
# print ' Debug:: adding: ' + dataset '
|
40 |
|
|
datasetList.append(dataset)
|
41 |
|
|
|
42 |
|
|
return datasetList
|
43 |
|
|
|
44 |
|
|
def findCompletedDatasets(path):
|
45 |
|
|
print " Collecting information over completed samples"
|
46 |
|
|
datasetList = []
|
47 |
|
|
|
48 |
|
|
cmd = 'cat ' + mitCfg + '/' + version + '/Completed 2> /dev/null'
|
49 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
50 |
|
|
line = line[:-1] # strip '\n'
|
51 |
|
|
dataset = line
|
52 |
|
|
|
53 |
|
|
#if debug == 1:
|
54 |
|
|
# print ' Debug:: adding: ' + dataset '
|
55 |
|
|
datasetList.append(dataset)
|
56 |
|
|
|
57 |
|
|
return datasetList
|
58 |
|
|
|
59 |
|
|
def inList(file,list):
|
60 |
|
|
for entry in list:
|
61 |
|
|
if entry == file:
|
62 |
|
|
return True
|
63 |
|
|
return False
|
64 |
|
|
|
65 |
|
|
def cleanupCompletedList(ongoingDsetList,completedDsetList):
|
66 |
|
|
print " Update completed list with ongoing list"
|
67 |
|
|
|
68 |
|
|
for dataset in ongoingDsetList:
|
69 |
|
|
if inList(dataset,completedDsetList):
|
70 |
|
|
print ' -> removing fropm completed: ' + dataset
|
71 |
|
|
cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
|
72 |
|
|
cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
|
73 |
|
|
os.system(cmd)
|
74 |
|
|
|
75 |
|
|
#===================================================================================================
|
76 |
|
|
# Main starts here
|
77 |
|
|
#===================================================================================================
|
78 |
|
|
# Define string to explain usage of the script
|
79 |
|
|
usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
|
80 |
|
|
usage += " --version=<version>\n"
|
81 |
|
|
usage += " --cmssw=<name>\n"
|
82 |
|
|
usage += " --exe\n"
|
83 |
|
|
usage += " --noInfo\n"
|
84 |
|
|
usage += " --noDownload\n"
|
85 |
|
|
usage += " --forceCopy\n"
|
86 |
|
|
usage += " --debug\n"
|
87 |
|
|
usage += " --help\n\n"
|
88 |
|
|
|
89 |
|
|
# Define the valid options which can be specified and check out the command line
|
90 |
|
|
valid = ['mitCfg=','version=','cmssw=','help','exe','noInfo','noDownload','forceCopy','debug']
|
91 |
|
|
try:
|
92 |
|
|
opts, args = getopt.getopt(sys.argv[1:], "", valid)
|
93 |
|
|
except getopt.GetoptError, ex:
|
94 |
|
|
print usage
|
95 |
|
|
print str(ex)
|
96 |
|
|
sys.exit(1)
|
97 |
|
|
|
98 |
|
|
# --------------------------------------------------------------------------------------------------
|
99 |
|
|
# Get all parameters for the production
|
100 |
|
|
# --------------------------------------------------------------------------------------------------
|
101 |
|
|
# Set defaults for each option
|
102 |
|
|
mitCfg = 'filefi'
|
103 |
|
|
version = '013'
|
104 |
|
|
cmssw = ''
|
105 |
|
|
cmsswCfg = 'cmssw.cfg'
|
106 |
|
|
exe = 0
|
107 |
|
|
noInfo = False
|
108 |
|
|
noDownload = False
|
109 |
|
|
forceCopy = False
|
110 |
|
|
debug = False
|
111 |
|
|
|
112 |
|
|
# Read new values from the command line
|
113 |
|
|
for opt, arg in opts:
|
114 |
|
|
if opt == "--help":
|
115 |
|
|
print usage
|
116 |
|
|
sys.exit(0)
|
117 |
|
|
if opt == "--mitCfg":
|
118 |
|
|
mitCfg = arg
|
119 |
|
|
if opt == "--version":
|
120 |
|
|
version = arg
|
121 |
|
|
if opt == "--cmssw":
|
122 |
|
|
cmssw = arg
|
123 |
|
|
if opt == "--exe":
|
124 |
|
|
exe = 1
|
125 |
|
|
if opt == "--noInfo":
|
126 |
|
|
noInfo = True
|
127 |
|
|
if opt == "--noDownload":
|
128 |
|
|
noDownload = True
|
129 |
|
|
if opt == "--forceCopy":
|
130 |
|
|
forceCopy = True
|
131 |
|
|
if opt == "--debug":
|
132 |
|
|
debug = True
|
133 |
|
|
|
134 |
|
|
# Read parameters needed
|
135 |
|
|
crabFile = mitCfg + '/' + version + '/' + 'crab.cfg'
|
136 |
|
|
if not os.path.exists(crabFile):
|
137 |
|
|
cmd = "Crab file not found: %s" % crabFile
|
138 |
|
|
raise RuntimeError, cmd
|
139 |
|
|
cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg
|
140 |
|
|
if not os.path.exists(cmsswFile):
|
141 |
|
|
cmd = "Cmssw file not found: %s" % cmsswFile
|
142 |
|
|
cmsswCfg = 'cmssw.py'
|
143 |
|
|
cmsswFile = mitCfg + '/' + version + '/' + cmsswCfg
|
144 |
|
|
if not os.path.exists(cmsswFile):
|
145 |
|
|
cmd = "Cmssw file not found: %s" % cmsswFile
|
146 |
|
|
cmd = " XXXX ERROR no valid configuration found XXXX"
|
147 |
|
|
raise RuntimeError, cmd
|
148 |
|
|
|
149 |
|
|
# Find all started samples
|
150 |
|
|
path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
|
151 |
|
|
cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
|
152 |
|
|
for line in os.popen(cmd).readlines():
|
153 |
|
|
path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
|
154 |
|
|
|
155 |
|
|
startedDsetList = findStartedDatasets(path)
|
156 |
|
|
#print " Dataset list: "
|
157 |
|
|
#for dataset in startedDsetList:
|
158 |
|
|
# print ' -> ' + dataset
|
159 |
|
|
|
160 |
|
|
ongoingDsetList = findOngoingDatasets(path)
|
161 |
|
|
completedDsetList = findCompletedDatasets(path)
|
162 |
|
|
cleanupCompletedList(ongoingDsetList,completedDsetList)
|
163 |
|
|
completedDsetList = findCompletedDatasets(path)
|
164 |
|
|
|
165 |
|
|
# Resolve the other mitCfg parameters from the configuration file
|
166 |
|
|
cmd = 'cat ' + mitCfg + '/' + version + '/' + 'Productions'
|
167 |
|
|
if cmssw != '':
|
168 |
|
|
cmd = cmd + '.' + cmssw
|
169 |
|
|
|
170 |
|
|
|
171 |
|
|
print ''
|
172 |
|
|
join = 0
|
173 |
|
|
mitDataset = ""
|
174 |
|
|
fullLine = ""
|
175 |
|
|
bSlash = "\\";
|
176 |
|
|
for line in os.popen(cmd).readlines(): # run command
|
177 |
|
|
line = line[:-1]
|
178 |
|
|
#print 'Line: "' + line + '"'
|
179 |
|
|
# get ride of empty lines
|
180 |
|
|
if line == '':
|
181 |
|
|
continue
|
182 |
|
|
# get ride of commented lines and read steering parameters
|
183 |
|
|
if line[0] == '#':
|
184 |
|
|
names = line.split() # splitting every blank
|
185 |
|
|
if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
|
186 |
|
|
print 'No crontab has been set.... (LINE: ' + line + ')'
|
187 |
|
|
sys.exit(0)
|
188 |
|
|
continue
|
189 |
|
|
|
190 |
|
|
# join lines
|
191 |
|
|
if join == 1:
|
192 |
|
|
fullLine += line
|
193 |
|
|
else:
|
194 |
|
|
fullLine = line
|
195 |
|
|
|
196 |
|
|
# determine if finished or more is coming
|
197 |
|
|
if fullLine[-1] == bSlash:
|
198 |
|
|
join = 1
|
199 |
|
|
fullLine = fullLine[:-1]
|
200 |
|
|
else:
|
201 |
|
|
join = 0
|
202 |
|
|
fullLine = " ".join(str(fullLine).split()).strip()
|
203 |
|
|
# test whether there is a directory
|
204 |
|
|
names = fullLine.split() # splitting every blank
|
205 |
|
|
if debug == True:
|
206 |
|
|
print "FullLine: " + fullLine
|
207 |
|
|
cmsDataset = names[0]
|
208 |
|
|
mitDataset = names[1] # this is the equivalent MIT name of the dataset
|
209 |
|
|
nevents = int(names[2]) # number of events to be used in the production
|
210 |
|
|
procStatus = names[3]
|
211 |
|
|
local = names[4]
|
212 |
|
|
|
213 |
|
|
cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
|
214 |
|
|
' --version=' + version + ' --noTestJob'
|
215 |
|
|
if cmssw != '':
|
216 |
|
|
cmd = cmd + " --cmssw=" + cmssw
|
217 |
|
|
|
218 |
|
|
# check for errors (to be done)
|
219 |
|
|
|
220 |
|
|
# check for the logical combinations
|
221 |
|
|
if not inList(mitDataset,startedDsetList):
|
222 |
|
|
|
223 |
|
|
#print ' new: ' + mitDataset
|
224 |
|
|
print ' submitting: ' + cmd
|
225 |
|
|
if exe == 1:
|
226 |
|
|
os.system(cmd)
|
227 |
|
|
|
228 |
|
|
elif inList(mitDataset,ongoingDsetList):
|
229 |
|
|
|
230 |
|
|
#print ' sub: ' + mitDataset
|
231 |
|
|
print ' handled by jobSitter -- ' + mitDataset
|
232 |
|
|
|
233 |
|
|
elif inList(mitDataset,completedDsetList):
|
234 |
|
|
if not noInfo:
|
235 |
|
|
print ' don: ' + mitDataset
|
236 |
|
|
else:
|
237 |
|
|
|
238 |
|
|
cmd = cmd + ' --complete'
|
239 |
|
|
#print ' toc: ' + mitDataset
|
240 |
|
|
print ' completing: ' + cmd
|
241 |
|
|
if exe == 1:
|
242 |
|
|
os.system(cmd)
|
243 |
|
|
|
244 |
|
|
# test download request
|
245 |
|
|
if local != "-" and not noDownload:
|
246 |
|
|
localPath = local
|
247 |
|
|
cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
|
248 |
|
|
" --version=" + version
|
249 |
|
|
if cmssw != '':
|
250 |
|
|
cmd = cmd + " --cmssw=" + cmssw
|
251 |
|
|
if forceCopy:
|
252 |
|
|
cmd += ' --forceCopy'
|
253 |
|
|
print " " + cmd
|
254 |
|
|
if exe == 1:
|
255 |
|
|
status = os.system(cmd)
|
256 |
|
|
|
257 |
|
|
## if procStatus == "new":
|
258 |
|
|
## print " " + cmd
|
259 |
|
|
## if exe == 1:
|
260 |
|
|
## status = os.system(cmd)
|
261 |
|
|
## elif procStatus == "com":
|
262 |
|
|
## cmd = cmd + ' --noTestJob --complete'
|
263 |
|
|
## print " " + cmd
|
264 |
|
|
## if exe == 1:
|
265 |
|
|
## status = os.system(cmd)
|
266 |
|
|
## elif local != "-" and not noDownload:
|
267 |
|
|
## localPath = local
|
268 |
|
|
## cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
|
269 |
|
|
## " --version=" + version
|
270 |
|
|
## if forceCopy:
|
271 |
|
|
## cmd += ' --forceCopy'
|
272 |
|
|
## print " " + cmd
|
273 |
|
|
## if exe == 1:
|
274 |
|
|
## status = os.system(cmd)
|
275 |
|
|
## else:
|
276 |
|
|
## if not noInfo:
|
277 |
|
|
## print " Sample Info: " + fullLine
|
278 |
|
|
|
279 |
|
|
if mitDataset == "":
|
280 |
|
|
print "ERROR - dataset not defined."
|
281 |
|
|
sys.exit(0)
|
282 |
|
|
|
283 |
|
|
sys.exit(0)
|
284 |
|
|
|
285 |
|
|
# Say what we do now
|
286 |
|
|
print ' Preparing dataset: ' + cmsDataset + ' [MIT: ' + mitDataset + ' with ' + str(nevents) + \
|
287 |
|
|
' per job]'
|