1 |
#!/usr/bin/env python
|
2 |
#---------------------------------------------------------------------------------------------------
|
3 |
# Simple interface to command line DBS to prepare my crabTask input files.
|
4 |
#---------------------------------------------------------------------------------------------------
|
5 |
import os,sys,types,string,re,getopt
|
6 |
|
7 |
# Define string to explain usage of the script
|
8 |
usage = "Usage: input.py --dataset=<name>\n"
|
9 |
usage += " --option=[ lfn, xml ]\n"
|
10 |
usage += " [ --dbs= ]\n"
|
11 |
usage += " --help\n"
|
12 |
|
13 |
def printLine(option,nEvents,block,lfn,iJob):
|
14 |
if option == 'xml':
|
15 |
print ' <Job MaxEvents="%d'%nEvents + '" InputFiles="' + lfn \
|
16 |
+ '" SkipEvents="0" JobID="%d'%iJob + '" > </Job>'
|
17 |
else:
|
18 |
print "%s %s %d"%(block,lfn,nEvents)
|
19 |
|
20 |
|
21 |
# Define the valid options which can be specified and check out the command line
|
22 |
valid = ['db=','dbs=','dataset=','option=','help']
|
23 |
try:
|
24 |
opts, args = getopt.getopt(sys.argv[1:], "", valid)
|
25 |
except getopt.GetoptError, ex:
|
26 |
print usage
|
27 |
print str(ex)
|
28 |
sys.exit(1)
|
29 |
|
30 |
# --------------------------------------------------------------------------------------------------
|
31 |
# Get all parameters for the production
|
32 |
# --------------------------------------------------------------------------------------------------
|
33 |
# Set defaults for each option
|
34 |
db = None
|
35 |
dbs = None
|
36 |
dataset = None
|
37 |
option = 'lfn'
|
38 |
private = False
|
39 |
|
40 |
# Read new values from the command line
|
41 |
for opt, arg in opts:
|
42 |
if opt == "--help":
|
43 |
print usage
|
44 |
sys.exit(0)
|
45 |
if opt == "--db":
|
46 |
db = arg
|
47 |
if opt == "--dbs":
|
48 |
dbs = arg
|
49 |
if opt == "--dataset":
|
50 |
dataset = arg
|
51 |
if opt == "--option":
|
52 |
option = arg
|
53 |
|
54 |
# Deal with obvious problems
|
55 |
if dataset == None:
|
56 |
cmd = "--dataset= required parameter not provided."
|
57 |
raise RuntimeError, cmd
|
58 |
|
59 |
# is it a private production
|
60 |
f = dataset.split('/')
|
61 |
if f[1] == "mc":
|
62 |
private = True
|
63 |
#print ' Attention -- private dataset identified.'
|
64 |
|
65 |
#---------------------------------------------------------------------------------------------------
|
66 |
# main
|
67 |
#---------------------------------------------------------------------------------------------------
|
68 |
if private:
|
69 |
lfn = '/store/user/paus' + dataset
|
70 |
dir = '/mnt/hadoop/cms/store/user/paus' + dataset
|
71 |
cmd = 'list ' + dir
|
72 |
for line in os.popen(cmd).readlines():
|
73 |
line = line[:-1]
|
74 |
f = line.split(' ')
|
75 |
size = int(f[0])
|
76 |
file = f[1]
|
77 |
|
78 |
cmdCount = 'catalogFile.sh /mnt/hadoop/cms' + lfn + \
|
79 |
'/' + file + ' 2>/dev/null|tail -1|cut -d\' \' -f5'
|
80 |
#print ' COUNT: ' + cmdCount
|
81 |
nEvts = 0
|
82 |
for tmp in os.popen(cmdCount).readlines():
|
83 |
nEvts = tmp[:-1]
|
84 |
|
85 |
print '%s#00000000-0000-0000-0000-000000000000 %s/%s %s'%(dataset,lfn,file,nEvts)
|
86 |
sys.exit()
|
87 |
|
88 |
if not db:
|
89 |
# find relevant blocks
|
90 |
if dbs == 'none':
|
91 |
cmd = 'dascli.py --query="block=' + dataset + '*" --limit=999999 --format=blocks'
|
92 |
elif dbs == '':
|
93 |
cmd = 'dbs search --query=\"find block where dataset=*' + dataset + '\"'
|
94 |
elif re.search('http://',dbs):
|
95 |
cmd = 'dbs search --url=' + dbs + ' --query="find block where dataset=*' + dataset + '"'
|
96 |
else:
|
97 |
cmd = 'echo ' + dataset + '#00000000-0000-0000-0000-000000000000'
|
98 |
|
99 |
#print "CMD " + cmd
|
100 |
cmd += "| grep \# | sort"
|
101 |
# never print #print "cmd: " + cmd
|
102 |
|
103 |
blocks = []
|
104 |
iJob = 1
|
105 |
if option == 'xml':
|
106 |
print '<arguments>'
|
107 |
for line in os.popen(cmd).readlines():
|
108 |
line = line[:-1]
|
109 |
blocks.append(line)
|
110 |
for block in blocks:
|
111 |
#print ' BLOCK: ' + block
|
112 |
|
113 |
if dbs == 'none':
|
114 |
cmd = 'dascli.py --query="file block=' + block + '" --limit=999999 --format=files'
|
115 |
elif dbs == '':
|
116 |
cmd = 'dbs search --query="find file,file.numevents where block=' + block + '"'
|
117 |
elif re.search('http://',dbs):
|
118 |
cmd = 'dbs search --url=' + dbs + \
|
119 |
' --query="find file,file.numevents where block=' + block + '"'
|
120 |
else:
|
121 |
cmd = 'cat /home/cmsprod/catalog/t2mit/private/' + dbs + dataset \
|
122 |
+ '/Files | sed \'s@XX-CATALOG-XX@@\' | sed \'s@root://xrootd1.cmsaf.mit.edu/@@\''
|
123 |
|
124 |
#print "CMD " + cmd
|
125 |
cmd += "| grep store | sort"
|
126 |
for line in os.popen(cmd).readlines():
|
127 |
#print "LINE >" + line
|
128 |
line = line[:-1]
|
129 |
f = line.split()
|
130 |
lfn = f[0]
|
131 |
nEvents = int(f[1])
|
132 |
f = lfn.split("/")
|
133 |
file = f[-1]
|
134 |
if nEvents != 0:
|
135 |
printLine(option,nEvents,block,lfn,iJob)
|
136 |
iJob = iJob + 1
|
137 |
if option == 'xml':
|
138 |
print '</arguments>'
|
139 |
|
140 |
if db:
|
141 |
cmd = 'cat ' + db
|
142 |
|
143 |
iJob = 1
|
144 |
if option == 'xml':
|
145 |
print '<arguments>'
|
146 |
for line in os.popen(cmd).readlines():
|
147 |
line = line[:-1]
|
148 |
|
149 |
f = line.split()
|
150 |
block = f[0]
|
151 |
lfn = f[1]
|
152 |
nEvents = int(f[2])
|
153 |
|
154 |
f = lfn.split("/")
|
155 |
file = f[-1]
|
156 |
|
157 |
if nEvents != 0:
|
158 |
printLine(option,nEvents,block,lfn,iJob)
|
159 |
iJob = iJob + 1
|
160 |
|
161 |
if option == 'xml':
|
162 |
print '</arguments>'
|