1 |
#!/usr/bin/env perl
|
2 |
|
3 |
use strict;
|
4 |
use Mysql;
|
5 |
use File::Copy;
|
6 |
use Getopt::Long;
|
7 |
use POSIX;
|
8 |
use Term::ANSIColor;
|
9 |
use Cwd 'abs_path';
|
10 |
|
11 |
select ((select (STDOUT), $| = 1)[0]);
|
12 |
|
13 |
sub printHelp;
|
14 |
sub outputPset;
|
15 |
sub outputRunList;
|
16 |
sub getLocation;
|
17 |
sub outputCondor;
|
18 |
|
19 |
our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
|
20 |
|
21 |
my %opt;
|
22 |
Getopt::Long::Configure ("bundling");
|
23 |
GetOptions (\%opt, "dataset|d=s", "label|l=s", "maxEvents|m=s", "process|p=s", "help|h");
|
24 |
my $argc = @ARGV;
|
25 |
|
26 |
printHelp () if $opt{"help"};
|
27 |
printHelp () if $argc != 3 && $argc != 4;
|
28 |
my $dataset;
|
29 |
my $config;
|
30 |
my $directory;
|
31 |
my $nJobs;
|
32 |
if ($argc == 3)
|
33 |
{
|
34 |
$dataset = "";
|
35 |
$config = $ARGV[0];
|
36 |
$directory = $ARGV[1];
|
37 |
$nJobs = $ARGV[2];
|
38 |
}
|
39 |
if ($argc == 4)
|
40 |
{
|
41 |
$dataset = $ARGV[0];
|
42 |
$config = $ARGV[1];
|
43 |
$directory = $ARGV[2];
|
44 |
$nJobs = $ARGV[3];
|
45 |
}
|
46 |
if (!(-e $config))
|
47 |
{
|
48 |
print "\"$config\" does not exist!\n";
|
49 |
exit;
|
50 |
}
|
51 |
if (-e $directory)
|
52 |
{
|
53 |
print "Directory \"$directory\" already exists!\n";
|
54 |
print "Please delete it or specify another working directory.\n";
|
55 |
exit;
|
56 |
}
|
57 |
mkdir $directory;
|
58 |
my $nFiles = outputRunList ($dataset, $directory);
|
59 |
my $realNJobs = $nJobs;
|
60 |
$realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
|
61 |
my $eventsPerJob = -1;
|
62 |
$eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
|
63 |
my $realMaxEvents = $eventsPerJob * $realNJobs;
|
64 |
outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob, $opt{"process"});
|
65 |
$dataset = $opt{"dataset"} if $opt{"dataset"};
|
66 |
outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
|
67 |
copy ($config, "$directory/userConfig_cfg.py");
|
68 |
chdir $directory;
|
69 |
print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
|
70 |
print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
|
71 |
system ("LD_LIBRARY_PATH=/usr/lib64/condor:\$LD_LIBRARY_PATH condor_submit condor.sub");
|
72 |
|
73 |
sub
|
74 |
outputPset
|
75 |
{
|
76 |
my $workingDir = shift;
|
77 |
my $dataset = shift;
|
78 |
my $nEvents = shift;
|
79 |
my $eventsPerJob = shift;
|
80 |
my $process = shift;
|
81 |
|
82 |
$process =~ s/[^[:alnum:]]//g;
|
83 |
|
84 |
open (PSET, ">$workingDir/config_cfg.py");
|
85 |
|
86 |
print PSET "import FWCore.ParameterSet.Config as cms\n";
|
87 |
print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
|
88 |
print PSET "import re\n";
|
89 |
print PSET "import userConfig_cfg as pset\n";
|
90 |
print PSET "\n";
|
91 |
print PSET "fileName = pset.process.TFileService.fileName\n";
|
92 |
print PSET "fileName = fileName.pythonValue ()\n";
|
93 |
print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
|
94 |
print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
|
95 |
print PSET "pset.process.TFileService.fileName = fileName\n";
|
96 |
print PSET "\n";
|
97 |
if ($dataset)
|
98 |
{
|
99 |
print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
|
100 |
print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
|
101 |
}
|
102 |
print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
|
103 |
print PSET "process = pset.process\n";
|
104 |
print PSET "process.setName_ (process.name_ () + '$process')\n" if $process;
|
105 |
|
106 |
close (PSET);
|
107 |
}
|
108 |
|
109 |
sub
|
110 |
outputRunList
|
111 |
{
|
112 |
my $dataset = shift;
|
113 |
my $workingDir = shift;
|
114 |
|
115 |
return 0 if !$dataset;
|
116 |
my $location;
|
117 |
my $nFiles;
|
118 |
my $status;
|
119 |
my $crossSection;
|
120 |
my $isLocation = 0;
|
121 |
my $isRunList = 0;
|
122 |
$isLocation = 1 if -d $dataset;
|
123 |
$isRunList = 1 if -f $dataset;
|
124 |
$location = $dataset if $isLocation;
|
125 |
if (!$isLocation && !$isRunList)
|
126 |
{
|
127 |
($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
|
128 |
if ($status ne "present")
|
129 |
{
|
130 |
print "This dataset is not marked as present on the Tier 3!\n";
|
131 |
print "Continue anyway? (y/N): ";
|
132 |
my $response = <STDIN>;
|
133 |
$response =~ s/\n//g;
|
134 |
exit if !$response || lc ($response) ne "y";
|
135 |
}
|
136 |
if (!(-e $location))
|
137 |
{
|
138 |
print "The database does not know where this dataset is!\n";
|
139 |
exit;
|
140 |
}
|
141 |
}
|
142 |
my @files;
|
143 |
if (!$isRunList)
|
144 |
{
|
145 |
opendir (LOCATION, $location);
|
146 |
@files = readdir (LOCATION);
|
147 |
closedir (LOCATION);
|
148 |
}
|
149 |
else
|
150 |
{
|
151 |
open (RUNLIST, $dataset);
|
152 |
while (my $file = <RUNLIST>)
|
153 |
{
|
154 |
push (@files, abs_path ($file));
|
155 |
}
|
156 |
close (RUNLIST);
|
157 |
}
|
158 |
$nFiles = @files - 2 if $isLocation || $isRunList;
|
159 |
if (@files - 2 != $nFiles)
|
160 |
{
|
161 |
print "Number of files does not match database entry!\n";
|
162 |
print "Continue anyway? (y/N): ";
|
163 |
my $response = <STDIN>;
|
164 |
$response =~ s/\n//g;
|
165 |
exit if !$response || lc ($response) ne "y";
|
166 |
}
|
167 |
open (RUNLIST, ">$workingDir/runList.py");
|
168 |
print RUNLIST "runList = [\n";
|
169 |
for (my $i = 0; $i < @files; $i++)
|
170 |
{
|
171 |
next if $files[$i] eq ".";
|
172 |
next if $files[$i] eq "..";
|
173 |
next if !($files[$i] =~ m/\.root$/);
|
174 |
print RUNLIST "'file:" . abs_path ("$location/$files[$i]") . "'";
|
175 |
print RUNLIST "," if $i + 1 != @files;
|
176 |
print RUNLIST "\n";
|
177 |
}
|
178 |
print RUNLIST "]";
|
179 |
close (RUNLIST);
|
180 |
if ($crossSection && $crossSection >= 0.0)
|
181 |
{
|
182 |
open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
|
183 |
print CROSS_SECTION "$crossSection\n";
|
184 |
close (CROSS_SECTION);
|
185 |
}
|
186 |
|
187 |
return $nFiles;
|
188 |
}
|
189 |
|
190 |
sub
|
191 |
getLocation
|
192 |
{
|
193 |
my $dataset = shift;
|
194 |
|
195 |
my $results;
|
196 |
my $queryDataset = $dataset;
|
197 |
$queryDataset =~ s/\*/%/g;
|
198 |
$queryDataset =~ s/(.*)/%$1%/g;
|
199 |
my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
|
200 |
$db->selectdb ("ntuple");
|
201 |
$results = $db->query ($query);
|
202 |
if ($results->numrows () == 1)
|
203 |
{
|
204 |
my @row = $results->fetchrow ();
|
205 |
$row[7] += 0;
|
206 |
return ($row[3], $row[4], $row[5], $row[7]) if $row[7] > 0;
|
207 |
return ($row[3], $row[4], $row[5], $row[6]) if $row[7] <= 0;
|
208 |
}
|
209 |
if ($results->numrows () == 0)
|
210 |
{
|
211 |
print "Dataset does not exist on the Tier 3!\n";
|
212 |
exit;
|
213 |
}
|
214 |
my %map;
|
215 |
print "Found multiple datasets matching\n";
|
216 |
print "\"$dataset\":\n";
|
217 |
for (my $i = 1; $i <= $results->numrows (); $i++)
|
218 |
{
|
219 |
my @row = $results->fetchrow ();
|
220 |
$row[2] =~ s/([^ ]*) [^ ]*/$1/g;
|
221 |
$map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
|
222 |
$map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
|
223 |
printf "(%2d) $row[0]\n", $i;
|
224 |
print " (";
|
225 |
print color "green" if $row[5] eq "present";
|
226 |
print color "bold yellow" if $row[5] eq "submitted";
|
227 |
print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
|
228 |
print $row[5];
|
229 |
print color "reset";
|
230 |
print ") created by $row[1] on $row[2]\n";
|
231 |
}
|
232 |
print "\nWhich dataset would you like to use?: ";
|
233 |
my $response = <STDIN>;
|
234 |
$response =~ s/[ \t\n]//g;
|
235 |
if (!(exists $map{$response}))
|
236 |
{
|
237 |
print "Your selection \"$response\" was not a valid option! Quitting.\n";
|
238 |
exit;
|
239 |
}
|
240 |
|
241 |
return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
|
242 |
}
|
243 |
|
244 |
sub
|
245 |
outputCondor
|
246 |
{
|
247 |
my $condorFileName = shift;
|
248 |
my $workingDir = shift;
|
249 |
my $nJobs = shift;
|
250 |
my $dataset = shift;
|
251 |
my $label = shift;
|
252 |
|
253 |
my $cmsRun = `which cmsRun`;
|
254 |
my $condorFile = "";
|
255 |
|
256 |
if (!(-e $condorFileName))
|
257 |
{
|
258 |
my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)";
|
259 |
$arguments .= " $dataset" if $dataset;
|
260 |
$arguments .= " NULL" if !$dataset;
|
261 |
$arguments .= " $label" if $label;
|
262 |
$arguments .= " NULL" if !$label;
|
263 |
$arguments .= "\n";
|
264 |
|
265 |
$condorFile .= "Executable = $cmsRun\n";
|
266 |
$condorFile .= "Universe = vanilla\n";
|
267 |
$condorFile .= "Getenv = True\n";
|
268 |
$condorFile .= $arguments;
|
269 |
$condorFile .= "\n";
|
270 |
$condorFile .= "Output = condor_\$(Process).out\n";
|
271 |
$condorFile .= "Error = condor_\$(Process).err\n";
|
272 |
$condorFile .= "Log = condor_\$(Process).log\n";
|
273 |
$condorFile .= "\n";
|
274 |
$condorFile .= "+IsLocalJob = true\n";
|
275 |
$condorFile .= "Rank = TARGET.IsLocalSlot\n";
|
276 |
$condorFile .= "\n";
|
277 |
$condorFile .= "Queue $nJobs\n";
|
278 |
}
|
279 |
else
|
280 |
{
|
281 |
open (SUB, "<$condorFileName");
|
282 |
my @condorFile = <SUB>;
|
283 |
close (SUB);
|
284 |
$condorFile = join ("", @condorFile);
|
285 |
$condorFile =~ s/\$cmsRun/$cmsRun/g;
|
286 |
$condorFile =~ s/\$nJobs/$nJobs/g;
|
287 |
$condorFile =~ s/\$dataset/$dataset/g if $dataset;
|
288 |
$condorFile =~ s/\$dataset/NULL/g if !$dataset;
|
289 |
$condorFile =~ s/\$label/$label/g if $label;
|
290 |
$condorFile =~ s/\$label/NULL/g if !$label;
|
291 |
}
|
292 |
|
293 |
open (SUB, ">$workingDir/condor.sub");
|
294 |
print SUB $condorFile;
|
295 |
close (SUB);
|
296 |
}
|
297 |
|
298 |
sub
|
299 |
printHelp
|
300 |
{
|
301 |
my $exeName = $0;
|
302 |
$exeName =~ s/^.*\/([^\/]*)$/$1/;
|
303 |
|
304 |
print "Usage: $exeName [OPTION]... [DATASET | LOCATION | LIST] CONFIG DIRECTORY NJOBS\n";
|
305 |
print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
|
306 |
print "\n";
|
307 |
printf "%-29s%s\n", " -d, --dataset DATASET", "override the dataset name";
|
308 |
printf "%-29s%s\n", " -h, --help", "print this help message";
|
309 |
printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label";
|
310 |
printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is";
|
311 |
printf "%-29s%s\n", " -p, --process PROCESS", "suffix for the process name";
|
312 |
printf "%-29s%s\n", " ", "to run over all events";
|
313 |
print "\n";
|
314 |
print "The optional first argument must be either a DATASET registered in the Tier 3\n";
|
315 |
print "ntuple database, a LOCATION which exists on disk, or a text file containing a\n";
|
316 |
print "LIST of files to run over. CONFIG must be a valid CMSSW python configuration\n";
|
317 |
print "which can be used with cmsRun. DIRECTORY is a working directory that is created\n";
|
318 |
print " and in which all output, both from the CMSSW jobs and from Condor, is placed.\n";
|
319 |
print "Finally, NJOBS is the number of Condor jobs that will be created.\n";
|
320 |
|
321 |
exit;
|
322 |
}
|