ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/input.py
Revision: 1.7
Committed: Fri Jul 26 14:53:30 2013 UTC (11 years, 9 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_032, HEAD
Changes since 1.6: +27 -0 lines
Log Message:
Updates

File Contents

# Content
1 #!/usr/bin/env python
2 #---------------------------------------------------------------------------------------------------
3 # Simple interface to command line DBS to prepare my crabTask input files.
4 #---------------------------------------------------------------------------------------------------
5 import os,sys,types,string,re,getopt
6
7 # Define string to explain usage of the script
8 usage = "Usage: input.py --dataset=<name>\n"
9 usage += " --option=[ lfn, xml ]\n"
10 usage += " [ --dbs= ]\n"
11 usage += " --help\n"
12
13 def printLine(option,nEvents,block,lfn,iJob):
14 if option == 'xml':
15 print ' <Job MaxEvents="%d'%nEvents + '" InputFiles="' + lfn \
16 + '" SkipEvents="0" JobID="%d'%iJob + '" > </Job>'
17 else:
18 print "%s %s %d"%(block,lfn,nEvents)
19
20
21 # Define the valid options which can be specified and check out the command line
22 valid = ['db=','dbs=','dataset=','option=','help']
23 try:
24 opts, args = getopt.getopt(sys.argv[1:], "", valid)
25 except getopt.GetoptError, ex:
26 print usage
27 print str(ex)
28 sys.exit(1)
29
30 # --------------------------------------------------------------------------------------------------
31 # Get all parameters for the production
32 # --------------------------------------------------------------------------------------------------
33 # Set defaults for each option
34 db = None
35 dbs = None
36 dataset = None
37 option = 'lfn'
38 private = False
39
40 # Read new values from the command line
41 for opt, arg in opts:
42 if opt == "--help":
43 print usage
44 sys.exit(0)
45 if opt == "--db":
46 db = arg
47 if opt == "--dbs":
48 dbs = arg
49 if opt == "--dataset":
50 dataset = arg
51 if opt == "--option":
52 option = arg
53
54 # Deal with obvious problems
55 if dataset == None:
56 cmd = "--dataset= required parameter not provided."
57 raise RuntimeError, cmd
58
59 # is it a private production
60 f = dataset.split('/')
61 if f[1] == "mc":
62 private = True
63 #print ' Attention -- private dataset identified.'
64
65 #---------------------------------------------------------------------------------------------------
66 # main
67 #---------------------------------------------------------------------------------------------------
68 if private:
69 lfn = '/store/user/paus' + dataset
70 dir = '/mnt/hadoop/cms/store/user/paus' + dataset
71 cmd = 'list ' + dir
72 for line in os.popen(cmd).readlines():
73 line = line[:-1]
74 f = line.split(' ')
75 size = int(f[0])
76 file = f[1]
77
78 cmdCount = 'catalogFile.sh /mnt/hadoop/cms' + lfn + \
79 '/' + file + ' 2>/dev/null|tail -1|cut -d\' \' -f5'
80 #print ' COUNT: ' + cmdCount
81 nEvts = 0
82 for tmp in os.popen(cmdCount).readlines():
83 nEvts = tmp[:-1]
84
85 print '%s#00000000-0000-0000-0000-000000000000 %s/%s %s'%(dataset,lfn,file,nEvts)
86 sys.exit()
87
88 if not db:
89 # find relevant blocks
90 if dbs == 'none':
91 cmd = 'dascli.py --query="block=' + dataset + '*" --limit=999999 --format=blocks'
92 elif dbs == '':
93 cmd = 'dbs search --query=\"find block where dataset=*' + dataset + '\"'
94 elif re.search('http://',dbs):
95 cmd = 'dbs search --url=' + dbs + ' --query="find block where dataset=*' + dataset + '"'
96 else:
97 cmd = 'echo ' + dataset + '#00000000-0000-0000-0000-000000000000'
98
99 #print "CMD " + cmd
100 cmd += "| grep \# | sort"
101 # never print #print "cmd: " + cmd
102
103 blocks = []
104 iJob = 1
105 if option == 'xml':
106 print '<arguments>'
107 for line in os.popen(cmd).readlines():
108 line = line[:-1]
109 blocks.append(line)
110 for block in blocks:
111 #print ' BLOCK: ' + block
112
113 if dbs == 'none':
114 cmd = 'dascli.py --query="file block=' + block + '" --limit=999999 --format=files'
115 elif dbs == '':
116 cmd = 'dbs search --query="find file,file.numevents where block=' + block + '"'
117 elif re.search('http://',dbs):
118 cmd = 'dbs search --url=' + dbs + \
119 ' --query="find file,file.numevents where block=' + block + '"'
120 else:
121 cmd = 'cat /home/cmsprod/catalog/t2mit/private/' + dbs + dataset \
122 + '/Files | sed \'s@XX-CATALOG-XX@@\' | sed \'s@root://xrootd1.cmsaf.mit.edu/@@\''
123
124 #print "CMD " + cmd
125 cmd += "| grep store | sort"
126 for line in os.popen(cmd).readlines():
127 #print "LINE >" + line
128 line = line[:-1]
129 f = line.split()
130 lfn = f[0]
131 nEvents = int(f[1])
132 f = lfn.split("/")
133 file = f[-1]
134 if nEvents != 0:
135 printLine(option,nEvents,block,lfn,iJob)
136 iJob = iJob + 1
137 if option == 'xml':
138 print '</arguments>'
139
140 if db:
141 cmd = 'cat ' + db
142
143 iJob = 1
144 if option == 'xml':
145 print '<arguments>'
146 for line in os.popen(cmd).readlines():
147 line = line[:-1]
148
149 f = line.split()
150 block = f[0]
151 lfn = f[1]
152 nEvents = int(f[2])
153
154 f = lfn.split("/")
155 file = f[-1]
156
157 if nEvents != 0:
158 printLine(option,nEvents,block,lfn,iJob)
159 iJob = iJob + 1
160
161 if option == 'xml':
162 print '</arguments>'