ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.17
Committed: Wed May 1 19:47:56 2013 UTC (12 years ago) by ahart
Branch: MAIN
CVS Tags: V02-01-00, V01-01-00, V01-00-01, V01-00-00
Changes since 1.16: +5 -5 lines
Log Message:
The string concatenation operator in Perl is ".", not "+". Rookie mistake.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use File::Copy;
6 use Getopt::Long;
7 use POSIX;
8
9 sub printHelp;
10 sub outputPset;
11 sub outputRunList;
12 sub getLocation;
13 sub outputCondor;
14
15 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
16
17 my %opt;
18 Getopt::Long::Configure ("bundling");
19 GetOptions (\%opt, "label|l=s", "maxEvents|m=s", "help|h");
20 my $argc = @ARGV;
21
22 printHelp () if $opt{"help"};
23 printHelp () if $argc != 3 && $argc != 4;
24 my $dataset;
25 my $config;
26 my $directory;
27 my $nJobs;
28 if ($argc == 3)
29 {
30 $dataset = "";
31 $config = $ARGV[0];
32 $directory = $ARGV[1];
33 $nJobs = $ARGV[2];
34 }
35 if ($argc == 4)
36 {
37 $dataset = $ARGV[0];
38 $config = $ARGV[1];
39 $directory = $ARGV[2];
40 $nJobs = $ARGV[3];
41 }
42 if (!(-e $config))
43 {
44 print "\"$config\" does not exist!\n";
45 exit;
46 }
47 if (-e $directory)
48 {
49 print "Directory \"$directory\" already exists!\n";
50 print "Please delete it or specify another working directory.\n";
51 exit;
52 }
53 mkdir $directory;
54 my $nFiles = outputRunList ($dataset, $directory);
55 my $realNJobs = $nJobs;
56 $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
57 my $eventsPerJob = -1;
58 $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
59 my $realMaxEvents = $eventsPerJob * $realNJobs;
60 outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob);
61 outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
62 copy ($config, "$directory/userConfig_cfg.py");
63 chdir $directory;
64 print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
65 print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
66 system ("condor_submit condor.sub");
67
68 sub
69 outputPset
70 {
71 my $workingDir = shift;
72 my $dataset = shift;
73 my $nEvents = shift;
74 my $eventsPerJob = shift;
75
76 open (PSET, ">$workingDir/config_cfg.py");
77
78 print PSET "import FWCore.ParameterSet.Config as cms\n";
79 print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
80 print PSET "import re\n";
81 print PSET "import userConfig_cfg as pset\n";
82 print PSET "\n";
83 print PSET "fileName = pset.process.TFileService.fileName\n";
84 print PSET "fileName = fileName.pythonValue ()\n";
85 print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
86 print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
87 print PSET "pset.process.TFileService.fileName = fileName\n";
88 print PSET "\n";
89 if ($dataset)
90 {
91 print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
92 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
93 }
94 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
95 print PSET "process = pset.process\n";
96
97 close (PSET);
98 }
99
100 sub
101 outputRunList
102 {
103 my $dataset = shift;
104 my $workingDir = shift;
105
106 return 0 if !$dataset;
107 my $location;
108 my $nFiles;
109 my $status;
110 my $crossSection;
111 my $isLocation = 0;
112 $isLocation = 1 if -e $dataset;
113 if (!$isLocation)
114 {
115 ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
116 if ($status ne "present")
117 {
118 print "This dataset is not marked as present on the Tier 3!\n";
119 print "Continue anyway? (y/N): ";
120 my $response = <STDIN>;
121 $response =~ s/\n//g;
122 exit if !$response || lc ($response) ne "y";
123 }
124 if (!(-e $location))
125 {
126 print "The database does not know where this dataset is!\n";
127 exit;
128 }
129 }
130 else
131 {
132 $location = $dataset;
133 }
134 opendir (LOCATION, $location);
135 my @files = readdir (LOCATION);
136 closedir (LOCATION);
137 if (!$isLocation && @files - 2 != $nFiles)
138 {
139 print "Number of files does not match database entry!\n";
140 print "Continue anyway? (y/N): ";
141 my $response = <STDIN>;
142 $response =~ s/\n//g;
143 exit if !$response || lc ($response) ne "y";
144 }
145 elsif ($isLocation)
146 {
147 $nFiles = @files - 2;
148 }
149 open (RUNLIST, ">$workingDir/runList.py");
150 print RUNLIST "runList = [\n";
151 for (my $i = 0; $i < @files; $i++)
152 {
153 next if $files[$i] eq ".";
154 next if $files[$i] eq "..";
155 print RUNLIST "'file:$location/$files[$i]'";
156 print RUNLIST "," if $i + 1 != @files;
157 print RUNLIST "\n";
158 }
159 print RUNLIST "]";
160 close (RUNLIST);
161 if ($crossSection && $crossSection >= 0.0)
162 {
163 open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
164 print CROSS_SECTION "$crossSection\n";
165 close (CROSS_SECTION);
166 }
167
168 return $nFiles;
169 }
170
171 sub
172 getLocation
173 {
174 my $dataset = shift;
175
176 my $results;
177 my $queryDataset = $dataset;
178 $queryDataset =~ s/\*/%/g;
179 $queryDataset =~ s/(.*)/%$1%/g;
180 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
181 $db->selectdb ("ntuple");
182 $results = $db->query ($query);
183 if ($results->numrows () == 1)
184 {
185 my @row = $results->fetchrow ();
186 return ($row[3], $row[4], $row[5], $row[7]) if $row[7];
187 return ($row[3], $row[4], $row[5], $row[6]) if !$row[7];
188 }
189 if ($results->numrows () == 0)
190 {
191 print "Dataset does not exist on the Tier 3!\n";
192 exit;
193 }
194 my %map;
195 print "Found multiple datasets matching\n";
196 print "\"$dataset\":\n";
197 for (my $i = 1; $i <= $results->numrows (); $i++)
198 {
199 my @row = $results->fetchrow ();
200 $map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
201 $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
202 printf "(%2d) $row[0]\n", $i;
203 print " created by $row[1] on $row[2]\n";
204 }
205 print "\nWhich dataset would you like to use?: ";
206 my $response = <STDIN>;
207 $response =~ s/[ \t\n]//g;
208 if (!(exists $map{$response}))
209 {
210 print "Your selection \"$response\" was not a valid option! Quitting.\n";
211 exit;
212 }
213
214 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
215 }
216
217 sub
218 outputCondor
219 {
220 my $condorFileName = shift;
221 my $workingDir = shift;
222 my $nJobs = shift;
223 my $dataset = shift;
224 my $label = shift;
225
226 my $cmsRun = `which cmsRun`;
227 my $condorFile = "";
228
229 if (!(-e $condorFileName))
230 {
231 my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)";
232 $arguments .= " $dataset" if $dataset;
233 $arguments .= " NULL" if !$dataset;
234 $arguments .= " $label" if $label;
235 $arguments .= " NULL" if !$label;
236 $arguments .= "\n";
237
238 $condorFile .= "Executable = $cmsRun\n";
239 $condorFile .= "Universe = vanilla\n";
240 $condorFile .= "Getenv = True\n";
241 $condorFile .= $arguments;
242 $condorFile .= "\n";
243 $condorFile .= "Output = condor_\$(Process).out\n";
244 $condorFile .= "Error = condor_\$(Process).err\n";
245 $condorFile .= "Log = condor_\$(Process).log\n";
246 $condorFile .= "\n";
247 $condorFile .= "+IsLocalJob = true\n";
248 $condorFile .= "Rank = TARGET.IsLocalSlot\n";
249 $condorFile .= "\n";
250 $condorFile .= "Queue $nJobs\n";
251 }
252 else
253 {
254 open (SUB, "<$condorFileName");
255 my @condorFile = <SUB>;
256 close (SUB);
257 $condorFile = join ("", @condorFile);
258 $condorFile =~ s/\$cmsRun/$cmsRun/g;
259 $condorFile =~ s/\$nJobs/$nJobs/g;
260 $condorFile =~ s/\$dataset/$dataset/g if $dataset;
261 $condorFile =~ s/\$dataset/NULL/g if !$dataset;
262 $condorFile =~ s/\$label/$label/g if $label;
263 $condorFile =~ s/\$label/NULL/g if !$label;
264 }
265
266 open (SUB, ">$workingDir/condor.sub");
267 print SUB $condorFile;
268 close (SUB);
269 }
270
271 sub
272 printHelp
273 {
274 my $exeName = $0;
275 $exeName =~ s/^.*\/([^\/]*)$/$1/;
276
277 print "Usage: $exeName [OPTION]... [DATASET | LOCATION] CONFIG DIRECTORY NJOBS\n";
278 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
279 print "\n";
280 printf "%-29s%s\n", " -h, --help", "print this help message";
281 printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label";
282 printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is";
283 printf "%-29s%s\n", " ", "to run over all events";
284 print "\n";
285 print "The optional first argument must be either a DATASET registered in the Tier 3\n";
286 print "ntuple database or a LOCATION which exists on disk. CONFIG must be a valid\n";
287 print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a\n";
288 print "working directory that is created and in which all output, both from the CMSSW\n";
289 print "jobs and from Condor, is placed. Finally, NJOBS is the number of Condor jobs\n";
290 print "that will be created.\n";
291
292 exit;
293 }