ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitProd/Processing/bin/input.py
Revision: 1.7
Committed: Fri Jul 26 14:53:30 2013 UTC (11 years, 9 months ago) by paus
Content type: text/x-python
Branch: MAIN
CVS Tags: Mit_032, HEAD
Changes since 1.6: +27 -0 lines
Log Message:
Updates

File Contents

# User Rev Content
1 paus 1.2 #!/usr/bin/env python
2     #---------------------------------------------------------------------------------------------------
3     # Simple interface to command line DBS to prepare my crabTask input files.
4     #---------------------------------------------------------------------------------------------------
5 paus 1.5 import os,sys,types,string,re,getopt
6 paus 1.2
7     # Define string to explain usage of the script
8     usage = "Usage: input.py --dataset=<name>\n"
9     usage += " --option=[ lfn, xml ]\n"
10 paus 1.3 usage += " [ --dbs= ]\n"
11 paus 1.2 usage += " --help\n"
12    
13     def printLine(option,nEvents,block,lfn,iJob):
14     if option == 'xml':
15     print ' <Job MaxEvents="%d'%nEvents + '" InputFiles="' + lfn \
16     + '" SkipEvents="0" JobID="%d'%iJob + '" > </Job>'
17     else:
18     print "%s %s %d"%(block,lfn,nEvents)
19    
20    
21     # Define the valid options which can be specified and check out the command line
22 paus 1.3 valid = ['db=','dbs=','dataset=','option=','help']
23 paus 1.2 try:
24     opts, args = getopt.getopt(sys.argv[1:], "", valid)
25     except getopt.GetoptError, ex:
26     print usage
27     print str(ex)
28     sys.exit(1)
29    
30     # --------------------------------------------------------------------------------------------------
31     # Get all parameters for the production
32     # --------------------------------------------------------------------------------------------------
33     # Set defaults for each option
34     db = None
35 paus 1.3 dbs = None
36 paus 1.2 dataset = None
37     option = 'lfn'
38 paus 1.7 private = False
39 paus 1.2
40     # Read new values from the command line
41     for opt, arg in opts:
42     if opt == "--help":
43     print usage
44     sys.exit(0)
45     if opt == "--db":
46     db = arg
47 paus 1.3 if opt == "--dbs":
48     dbs = arg
49 paus 1.2 if opt == "--dataset":
50     dataset = arg
51     if opt == "--option":
52     option = arg
53    
54     # Deal with obvious problems
55     if dataset == None:
56     cmd = "--dataset= required parameter not provided."
57     raise RuntimeError, cmd
58    
59 paus 1.7 # is it a private production
60     f = dataset.split('/')
61     if f[1] == "mc":
62     private = True
63     #print ' Attention -- private dataset identified.'
64    
65 paus 1.2 #---------------------------------------------------------------------------------------------------
66     # main
67     #---------------------------------------------------------------------------------------------------
68 paus 1.7 if private:
69     lfn = '/store/user/paus' + dataset
70     dir = '/mnt/hadoop/cms/store/user/paus' + dataset
71     cmd = 'list ' + dir
72     for line in os.popen(cmd).readlines():
73     line = line[:-1]
74     f = line.split(' ')
75     size = int(f[0])
76     file = f[1]
77    
78     cmdCount = 'catalogFile.sh /mnt/hadoop/cms' + lfn + \
79     '/' + file + ' 2>/dev/null|tail -1|cut -d\' \' -f5'
80     #print ' COUNT: ' + cmdCount
81     nEvts = 0
82     for tmp in os.popen(cmdCount).readlines():
83     nEvts = tmp[:-1]
84    
85     print '%s#00000000-0000-0000-0000-000000000000 %s/%s %s'%(dataset,lfn,file,nEvts)
86     sys.exit()
87    
88 paus 1.2 if not db:
89     # find relevant blocks
90 paus 1.4 if dbs == 'none':
91     cmd = 'dascli.py --query="block=' + dataset + '*" --limit=999999 --format=blocks'
92     elif dbs == '':
93 paus 1.6 cmd = 'dbs search --query=\"find block where dataset=*' + dataset + '\"'
94 paus 1.5 elif re.search('http://',dbs):
95 paus 1.6 cmd = 'dbs search --url=' + dbs + ' --query="find block where dataset=*' + dataset + '"'
96 paus 1.3 else:
97 paus 1.5 cmd = 'echo ' + dataset + '#00000000-0000-0000-0000-000000000000'
98    
99 paus 1.4 #print "CMD " + cmd
100 paus 1.2 cmd += "| grep \# | sort"
101 paus 1.3 # never print #print "cmd: " + cmd
102    
103 paus 1.2 blocks = []
104     iJob = 1
105     if option == 'xml':
106     print '<arguments>'
107     for line in os.popen(cmd).readlines():
108     line = line[:-1]
109     blocks.append(line)
110     for block in blocks:
111 paus 1.5 #print ' BLOCK: ' + block
112    
113 paus 1.4 if dbs == 'none':
114     cmd = 'dascli.py --query="file block=' + block + '" --limit=999999 --format=files'
115     elif dbs == '':
116     cmd = 'dbs search --query="find file,file.numevents where block=' + block + '"'
117 paus 1.5 elif re.search('http://',dbs):
118 paus 1.4 cmd = 'dbs search --url=' + dbs + \
119     ' --query="find file,file.numevents where block=' + block + '"'
120 paus 1.5 else:
121     cmd = 'cat /home/cmsprod/catalog/t2mit/private/' + dbs + dataset \
122     + '/Files | sed \'s@XX-CATALOG-XX@@\' | sed \'s@root://xrootd1.cmsaf.mit.edu/@@\''
123    
124 paus 1.4 #print "CMD " + cmd
125 paus 1.2 cmd += "| grep store | sort"
126     for line in os.popen(cmd).readlines():
127 paus 1.5 #print "LINE >" + line
128 paus 1.2 line = line[:-1]
129     f = line.split()
130     lfn = f[0]
131     nEvents = int(f[1])
132     f = lfn.split("/")
133     file = f[-1]
134     if nEvents != 0:
135     printLine(option,nEvents,block,lfn,iJob)
136     iJob = iJob + 1
137     if option == 'xml':
138     print '</arguments>'
139    
140     if db:
141     cmd = 'cat ' + db
142    
143     iJob = 1
144     if option == 'xml':
145     print '<arguments>'
146     for line in os.popen(cmd).readlines():
147     line = line[:-1]
148    
149     f = line.split()
150     block = f[0]
151     lfn = f[1]
152     nEvents = int(f[2])
153    
154     f = lfn.split("/")
155     file = f[-1]
156    
157     if nEvents != 0:
158     printLine(option,nEvents,block,lfn,iJob)
159     iJob = iJob + 1
160    
161     if option == 'xml':
162     print '</arguments>'