1 |
#!/usr/bin/env python
|
2 |
#---------------------------------------------------------------------------------------------------
|
3 |
# Script to identify all samples which are kept in our database.
|
4 |
#
|
5 |
# Author: C.Paus (September 23, 2008)
|
6 |
#---------------------------------------------------------------------------------------------------
|
7 |
import os,sys,getopt,re,string
|
8 |
|
9 |
def findStartedDatasets(path):
|
10 |
if debug == 1:
|
11 |
print " Collecting information over started samples"
|
12 |
datasetList = []
|
13 |
|
14 |
cmd = 'list ' + path
|
15 |
for line in os.popen(cmd).readlines(): # run command
|
16 |
line = line[:-1] # strip '\n'
|
17 |
f = line.split(" ")
|
18 |
size = f[0]
|
19 |
file = f[1]
|
20 |
|
21 |
#if debug == 1:
|
22 |
# print ' Debug:: adding: ' + file + ' with size ' + size
|
23 |
datasetList.append(file)
|
24 |
|
25 |
return datasetList
|
26 |
|
27 |
def findOngoingDatasets(path):
|
28 |
if debug == 1:
|
29 |
print " Collecting information over ongoing samples"
|
30 |
datasetList = []
|
31 |
|
32 |
cmd = 'cat crab_[0-9]_[0-9]*_[0-9]*/share/crab.cfg |grep ^user_remote_dir'
|
33 |
for line in os.popen(cmd).readlines(): # run command
|
34 |
line = line[:-1] # strip '\n'
|
35 |
f = line.split("/")
|
36 |
dataset = f[-1]
|
37 |
if re.search('crab_0',dataset):
|
38 |
dataset = f[-2]
|
39 |
|
40 |
#if debug == 1:
|
41 |
# print ' Debug:: adding: ' + dataset '
|
42 |
datasetList.append(dataset)
|
43 |
|
44 |
return datasetList
|
45 |
|
46 |
def findCompletedDatasets(path):
|
47 |
if debug == 1:
|
48 |
print " Collecting information over completed samples"
|
49 |
datasetList = []
|
50 |
|
51 |
cmd = 'cat ' + os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/Completed 2> /dev/null'
|
52 |
for line in os.popen(cmd).readlines(): # run command
|
53 |
line = line[:-1] # strip '\n'
|
54 |
dataset = line
|
55 |
|
56 |
#if debug == 1:
|
57 |
# print ' Debug:: adding: ' + dataset '
|
58 |
datasetList.append(dataset)
|
59 |
|
60 |
return datasetList
|
61 |
|
62 |
def inList(file,list):
|
63 |
for entry in list:
|
64 |
if entry == file:
|
65 |
return True
|
66 |
return False
|
67 |
|
68 |
def cleanupCompletedList(ongoingDsetList,completedDsetList):
|
69 |
if debug == 1:
|
70 |
print " Update completed list with ongoing list"
|
71 |
|
72 |
for dataset in ongoingDsetList:
|
73 |
if inList(dataset,completedDsetList):
|
74 |
print ' -> removing fropm completed: ' + dataset
|
75 |
cmd = 'cat ' + mitCfg + '/' + version + '/Completed|grep -v ^' + dataset + '$ > C.bak'
|
76 |
cmd += '; mv C.bak ' + mitCfg + '/' + version + '/Completed'
|
77 |
os.system(cmd)
|
78 |
|
79 |
#===================================================================================================
|
80 |
# Main starts here
|
81 |
#===================================================================================================
|
82 |
# Define string to explain usage of the script
|
83 |
usage = "\nUsage: findSamples.py --mitCfg=<name>\n"
|
84 |
usage += " --version=<version> [ default: MIT_VERS ]\n"
|
85 |
usage += " --cmssw=<name>\n"
|
86 |
usage += " --pattern=<name>\n"
|
87 |
usage += " --download=<int: -1,0,1>\n"
|
88 |
usage += " --status=<int: -1,0,1>\n"
|
89 |
usage += " --useExistingLfns\n"
|
90 |
usage += " --exe\n"
|
91 |
usage += " --noInfo\n"
|
92 |
usage += " --forceCopy\n"
|
93 |
usage += " --debug\n"
|
94 |
usage += " --help\n\n"
|
95 |
|
96 |
# Define the valid options which can be specified and check out the command line
|
97 |
valid = ['mitCfg=','version=','cmssw=','pattern=','download=','status=', \
|
98 |
'help','exe','useExistingLfns','complete','noInfo','forceCopy','debug']
|
99 |
try:
|
100 |
opts, args = getopt.getopt(sys.argv[1:], "", valid)
|
101 |
except getopt.GetoptError, ex:
|
102 |
print usage
|
103 |
print str(ex)
|
104 |
sys.exit(1)
|
105 |
|
106 |
# --------------------------------------------------------------------------------------------------
|
107 |
# Get all parameters for the production
|
108 |
# --------------------------------------------------------------------------------------------------
|
109 |
# Set defaults for each option
|
110 |
mitCfg = 'filefi'
|
111 |
version = os.environ['MIT_VERS']
|
112 |
cmssw = ''
|
113 |
pattern = ''
|
114 |
cmsswCfg = 'cmssw.cfg'
|
115 |
exe = 0
|
116 |
useExistingLfns = False
|
117 |
complete = 0
|
118 |
noInfo = False
|
119 |
download = -1
|
120 |
status = -1
|
121 |
forceCopy = False
|
122 |
debug = False
|
123 |
|
124 |
# Read new values from the command line
|
125 |
for opt, arg in opts:
|
126 |
if opt == "--help":
|
127 |
print usage
|
128 |
sys.exit(0)
|
129 |
if opt == "--mitCfg":
|
130 |
mitCfg = arg
|
131 |
if opt == "--version":
|
132 |
version = arg
|
133 |
if opt == "--cmssw":
|
134 |
cmssw = arg
|
135 |
if opt == "--pattern":
|
136 |
pattern = arg
|
137 |
if opt == "--exe":
|
138 |
exe = 1
|
139 |
if opt == "--useExistingLfns":
|
140 |
useExistingLfns = True
|
141 |
if opt == "--download":
|
142 |
download = int(arg)
|
143 |
if opt == "--status":
|
144 |
status = int(arg)
|
145 |
if opt == "--complete":
|
146 |
complete = 1
|
147 |
if opt == "--noInfo":
|
148 |
noInfo = True
|
149 |
if opt == "--forceCopy":
|
150 |
forceCopy = True
|
151 |
if opt == "--debug":
|
152 |
debug = True
|
153 |
|
154 |
# Read parameters needed
|
155 |
crabFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'crab.cfg'
|
156 |
if not os.path.exists(crabFile):
|
157 |
cmd = "Crab file not found: %s" % crabFile
|
158 |
raise RuntimeError, cmd
|
159 |
cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
|
160 |
if not os.path.exists(cmsswFile):
|
161 |
cmd = "Cmssw file not found: %s" % cmsswFile
|
162 |
cmsswCfg = 'cmssw.py'
|
163 |
cmsswFile = os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + cmsswCfg
|
164 |
if not os.path.exists(cmsswFile):
|
165 |
cmd = "Cmssw file not found: %s" % cmsswFile
|
166 |
cmd = " XXXX ERROR no valid configuration found XXXX"
|
167 |
raise RuntimeError, cmd
|
168 |
|
169 |
# Find all started samples
|
170 |
path = '/pnfs/cmsaf.mit.edu/t2bat/cms/store/user/paus/' + mitCfg + '/' + version
|
171 |
cmd = 'grep ^storage_element ' + crabFile + '| grep cern.ch'
|
172 |
for line in os.popen(cmd).readlines():
|
173 |
path = '/castor/cern.ch/user/p/paus/' + mitCfg + '/' + version
|
174 |
|
175 |
startedDsetList = findStartedDatasets(path)
|
176 |
#print " Dataset list: "
|
177 |
#for dataset in startedDsetList:
|
178 |
# print ' -> ' + dataset
|
179 |
|
180 |
ongoingDsetList = findOngoingDatasets(path)
|
181 |
completedDsetList = findCompletedDatasets(path)
|
182 |
cleanupCompletedList(ongoingDsetList,completedDsetList)
|
183 |
completedDsetList = findCompletedDatasets(path)
|
184 |
|
185 |
# Resolve the other mitCfg parameters from the configuration file
|
186 |
cmd = 'cat '+ os.environ['MIT_PROD_DIR'] + '/' + mitCfg + '/' + version + '/' + 'Productions'
|
187 |
if cmssw != '':
|
188 |
cmd = cmd + '.' + cmssw
|
189 |
|
190 |
|
191 |
print ''
|
192 |
|
193 |
join = 0
|
194 |
mitDataset = ""
|
195 |
fullLine = ""
|
196 |
bSlash = "\\";
|
197 |
printOpt = "-header"
|
198 |
for line in os.popen(cmd).readlines(): # run command
|
199 |
line = line[:-1]
|
200 |
#print 'Line: "' + line + '"'
|
201 |
# get ride of empty lines
|
202 |
if line == '':
|
203 |
continue
|
204 |
# get ride of commented lines and read steering parameters
|
205 |
if line[0] == '#':
|
206 |
names = line.split() # splitting every blank
|
207 |
if len(names)> 2 and names[1] == 'crontab' and int(names[3]) != 1:
|
208 |
print 'No crontab has been set.... (LINE: ' + line + ')'
|
209 |
sys.exit(0)
|
210 |
continue
|
211 |
|
212 |
# join lines
|
213 |
if join == 1:
|
214 |
fullLine += line
|
215 |
else:
|
216 |
fullLine = line
|
217 |
|
218 |
# determine if finished or more is coming
|
219 |
if fullLine[-1] == bSlash:
|
220 |
join = 1
|
221 |
fullLine = fullLine[:-1]
|
222 |
else:
|
223 |
join = 0
|
224 |
fullLine = " ".join(str(fullLine).split()).strip()
|
225 |
# test whether there is a directory
|
226 |
names = fullLine.split() # splitting every blank
|
227 |
if debug == True:
|
228 |
print "FullLine: " + fullLine
|
229 |
cmsDataset = names[0]
|
230 |
mitDataset = names[1] # this is the equivalent MIT name of the dataset
|
231 |
nevents = int(names[2]) # number of events to be used in the production
|
232 |
procStatus = names[3]
|
233 |
local = names[4]
|
234 |
|
235 |
if pattern != '' and not re.search(pattern,mitDataset):
|
236 |
continue
|
237 |
|
238 |
# make sure we want to consider submission
|
239 |
if download != 1 and status != 1:
|
240 |
cmd = 'submit.py --mitDataset=' + mitDataset + ' --mitCfg=' + mitCfg + \
|
241 |
' --version=' + version + ' --noTestJob'
|
242 |
if cmssw != '':
|
243 |
cmd = cmd + " --cmssw=" + cmssw
|
244 |
if useExistingLfns:
|
245 |
cmd = cmd + " --useExistingLfns"
|
246 |
|
247 |
# check for errors (to be done)
|
248 |
|
249 |
# check for the logical combinations
|
250 |
if not inList(mitDataset,startedDsetList):
|
251 |
#print ' new: ' + mitDataset
|
252 |
print ' submitting: ' + cmd
|
253 |
if exe == 1:
|
254 |
os.system(cmd)
|
255 |
|
256 |
elif inList(mitDataset,ongoingDsetList):
|
257 |
#print ' sub: ' + mitDataset
|
258 |
if download != 1 and status != 1:
|
259 |
print ' handled by jobSitter -- ' + mitDataset
|
260 |
|
261 |
elif inList(mitDataset,completedDsetList):
|
262 |
if not noInfo:
|
263 |
print ' don: ' + mitDataset
|
264 |
else:
|
265 |
if complete == 1:
|
266 |
cmd = cmd + ' --complete'
|
267 |
#print ' toc: ' + mitDataset
|
268 |
print ' completing: ' + cmd
|
269 |
if exe == 1:
|
270 |
os.system(cmd)
|
271 |
|
272 |
# test download request
|
273 |
if status != -1:
|
274 |
cmd = 'status.sh ' + mitCfg + '/' + version + ' ' + mitDataset + ' ' + printOpt
|
275 |
if exe == 1:
|
276 |
rc = os.system(cmd)
|
277 |
else:
|
278 |
print " " + cmd
|
279 |
printOpt = ""
|
280 |
|
281 |
# test download request
|
282 |
if local != "-" and download != -1:
|
283 |
localPath = local
|
284 |
cmd = 'downloadSample.py --cmsDataset=' + cmsDataset + ' --mitCfg=' + mitCfg + \
|
285 |
" --version=" + version
|
286 |
if cmssw != '':
|
287 |
cmd = cmd + " --cmssw=" + cmssw
|
288 |
if forceCopy:
|
289 |
cmd += ' --forceCopy'
|
290 |
print " " + cmd
|
291 |
if exe == 1:
|
292 |
rc = os.system(cmd)
|
293 |
|
294 |
if mitDataset == "":
|
295 |
print "ERROR - dataset not defined."
|
296 |
sys.exit(0)
|
297 |
|
298 |
sys.exit(0)
|