5 |
|
use File::Copy; |
6 |
|
use Getopt::Long; |
7 |
|
use POSIX; |
8 |
+ |
use Term::ANSIColor; |
9 |
+ |
use Cwd 'abs_path'; |
10 |
+ |
|
11 |
+ |
select ((select (STDOUT), $| = 1)[0]); |
12 |
|
|
13 |
|
sub printHelp; |
14 |
|
sub outputPset; |
20 |
|
|
21 |
|
my %opt; |
22 |
|
Getopt::Long::Configure ("bundling"); |
23 |
< |
GetOptions (\%opt, "help|h"); |
23 |
> |
GetOptions (\%opt, "dataset|d=s", "label|l=s", "maxEvents|m=s", "process|p=s", "help|h"); |
24 |
|
my $argc = @ARGV; |
25 |
|
|
26 |
|
printHelp () if $opt{"help"}; |
27 |
< |
printHelp () if $argc != 4; |
28 |
< |
if (!(-e $ARGV[1])) |
27 |
> |
printHelp () if $argc != 3 && $argc != 4; |
28 |
> |
my $dataset; |
29 |
> |
my $config; |
30 |
> |
my $directory; |
31 |
> |
my $nJobs; |
32 |
> |
if ($argc == 3) |
33 |
> |
{ |
34 |
> |
$dataset = ""; |
35 |
> |
$config = $ARGV[0]; |
36 |
> |
$directory = $ARGV[1]; |
37 |
> |
$nJobs = $ARGV[2]; |
38 |
> |
} |
39 |
> |
if ($argc == 4) |
40 |
|
{ |
41 |
< |
print "\"$ARGV[1]\" does not exist!\n"; |
41 |
> |
$dataset = $ARGV[0]; |
42 |
> |
$config = $ARGV[1]; |
43 |
> |
$directory = $ARGV[2]; |
44 |
> |
$nJobs = $ARGV[3]; |
45 |
> |
} |
46 |
> |
if (!(-e $config)) |
47 |
> |
{ |
48 |
> |
print "\"$config\" does not exist!\n"; |
49 |
|
exit; |
50 |
|
} |
51 |
< |
if (-e $ARGV[2]) |
51 |
> |
if (-e $directory) |
52 |
|
{ |
53 |
< |
print "Directory \"$ARGV[2]\" already exists!\n"; |
53 |
> |
print "Directory \"$directory\" already exists!\n"; |
54 |
|
print "Please delete it or specify another working directory.\n"; |
55 |
|
exit; |
56 |
|
} |
57 |
< |
mkdir $ARGV[2]; |
58 |
< |
my $nFiles = outputRunList ($ARGV[0], $ARGV[2]); |
59 |
< |
my $nJobs = $ARGV[3]; |
60 |
< |
my $realNJobs = ceil ($nFiles / ceil ($nFiles / $nJobs)); |
61 |
< |
outputPset ($ARGV[2]); |
62 |
< |
outputCondor ($ARGV[2], $realNJobs); |
63 |
< |
copy ($ARGV[1], "$ARGV[2]/userConfig_cfg.py"); |
64 |
< |
chdir $ARGV[2]; |
65 |
< |
print "Submitting $realNJobs jobs to run on $nFiles files.\n"; |
66 |
< |
system ("condor_submit condor.sub"); |
57 |
> |
mkdir $directory; |
58 |
> |
my $nFiles = outputRunList ($dataset, $directory); |
59 |
> |
my $realNJobs = $nJobs; |
60 |
> |
$realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles; |
61 |
> |
my $eventsPerJob = -1; |
62 |
> |
$eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0; |
63 |
> |
my $realMaxEvents = $eventsPerJob * $realNJobs; |
64 |
> |
outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob, $opt{"process"}); |
65 |
> |
$dataset = $opt{"dataset"} if $opt{"dataset"}; |
66 |
> |
outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"}); |
67 |
> |
copy ($config, "$directory/userConfig_cfg.py"); |
68 |
> |
chdir $directory; |
69 |
> |
print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0; |
70 |
> |
print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0; |
71 |
> |
system ("LD_LIBRARY_PATH=/usr/lib64/condor:\$LD_LIBRARY_PATH condor_submit condor.sub"); |
72 |
|
|
73 |
|
sub |
74 |
|
outputPset |
75 |
|
{ |
76 |
|
my $workingDir = shift; |
77 |
+ |
my $dataset = shift; |
78 |
+ |
my $nEvents = shift; |
79 |
+ |
my $eventsPerJob = shift; |
80 |
+ |
my $process = shift; |
81 |
+ |
|
82 |
+ |
$process =~ s/[^[:alnum:]]//g; |
83 |
|
|
84 |
|
open (PSET, ">$workingDir/config_cfg.py"); |
85 |
|
|
94 |
|
print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n"; |
95 |
|
print PSET "pset.process.TFileService.fileName = fileName\n"; |
96 |
|
print PSET "\n"; |
97 |
< |
print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n"; |
98 |
< |
print PSET "pset.process.maxEvents.input = cms.untracked.int32 (-1)\n"; |
97 |
> |
if ($dataset) |
98 |
> |
{ |
99 |
> |
print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n"; |
100 |
> |
print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0; |
101 |
> |
} |
102 |
> |
print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0; |
103 |
|
print PSET "process = pset.process\n"; |
104 |
+ |
print PSET "process.setName_ (process.name_ () + '$process')\n" if $process; |
105 |
|
|
106 |
|
close (PSET); |
107 |
|
} |
112 |
|
my $dataset = shift; |
113 |
|
my $workingDir = shift; |
114 |
|
|
115 |
< |
my ($location, $nFiles, $status, $crossSection) = getLocation ($dataset); |
116 |
< |
if ($status ne "present") |
115 |
> |
return 0 if !$dataset; |
116 |
> |
my $location; |
117 |
> |
my $nFiles; |
118 |
> |
my $status; |
119 |
> |
my $crossSection; |
120 |
> |
my $isLocation = 0; |
121 |
> |
my $isRunList = 0; |
122 |
> |
$isLocation = 1 if -d $dataset; |
123 |
> |
$isRunList = 1 if -f $dataset; |
124 |
> |
$location = $dataset if $isLocation; |
125 |
> |
if (!$isLocation && !$isRunList) |
126 |
|
{ |
127 |
< |
print "This dataset is not marked as present on the Tier 3!\n"; |
128 |
< |
exit; |
127 |
> |
($location, $nFiles, $status, $crossSection) = getLocation ($dataset); |
128 |
> |
if ($status ne "present") |
129 |
> |
{ |
130 |
> |
print "This dataset is not marked as present on the Tier 3!\n"; |
131 |
> |
print "Continue anyway? (y/N): "; |
132 |
> |
my $response = <STDIN>; |
133 |
> |
$response =~ s/\n//g; |
134 |
> |
exit if !$response || lc ($response) ne "y"; |
135 |
> |
} |
136 |
> |
if (!(-e $location)) |
137 |
> |
{ |
138 |
> |
print "The database does not know where this dataset is!\n"; |
139 |
> |
exit; |
140 |
> |
} |
141 |
|
} |
142 |
< |
if (!(-e $location)) |
142 |
> |
my @files; |
143 |
> |
if (!$isRunList) |
144 |
|
{ |
145 |
< |
print "The database does not know where this dataset is!\n"; |
146 |
< |
exit; |
145 |
> |
opendir (LOCATION, $location); |
146 |
> |
@files = readdir (LOCATION); |
147 |
> |
closedir (LOCATION); |
148 |
|
} |
149 |
< |
opendir (LOCATION, $location); |
150 |
< |
my @files = readdir (LOCATION); |
151 |
< |
closedir (LOCATION); |
149 |
> |
else |
150 |
> |
{ |
151 |
> |
open (RUNLIST, $dataset); |
152 |
> |
while (my $file = <RUNLIST>) |
153 |
> |
{ |
154 |
> |
push (@files, abs_path ($file)); |
155 |
> |
} |
156 |
> |
close (RUNLIST); |
157 |
> |
} |
158 |
> |
$nFiles = @files - 2 if $isLocation || $isRunList; |
159 |
|
if (@files - 2 != $nFiles) |
160 |
|
{ |
161 |
|
print "Number of files does not match database entry!\n"; |
162 |
< |
exit; |
162 |
> |
print "Continue anyway? (y/N): "; |
163 |
> |
my $response = <STDIN>; |
164 |
> |
$response =~ s/\n//g; |
165 |
> |
exit if !$response || lc ($response) ne "y"; |
166 |
|
} |
167 |
|
open (RUNLIST, ">$workingDir/runList.py"); |
168 |
|
print RUNLIST "runList = [\n"; |
170 |
|
{ |
171 |
|
next if $files[$i] eq "."; |
172 |
|
next if $files[$i] eq ".."; |
173 |
< |
print RUNLIST "'file:$location/$files[$i]'"; |
173 |
> |
next if !($files[$i] =~ m/\.root$/); |
174 |
> |
print RUNLIST "'file:" . abs_path ("$location/$files[$i]") . "'"; |
175 |
|
print RUNLIST "," if $i + 1 != @files; |
176 |
|
print RUNLIST "\n"; |
177 |
|
} |
196 |
|
my $queryDataset = $dataset; |
197 |
|
$queryDataset =~ s/\*/%/g; |
198 |
|
$queryDataset =~ s/(.*)/%$1%/g; |
199 |
< |
my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn from ntuple where dataset like '$queryDataset' and status='present' order by creationTime"; |
199 |
> |
my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime"; |
200 |
|
$db->selectdb ("ntuple"); |
201 |
|
$results = $db->query ($query); |
202 |
|
if ($results->numrows () == 1) |
203 |
|
{ |
204 |
|
my @row = $results->fetchrow (); |
205 |
< |
return ($row[3], $row[4], $row[5], $row[6]); |
205 |
> |
$row[7] += 0; |
206 |
> |
return ($row[3], $row[4], $row[5], $row[7]) if $row[7] > 0; |
207 |
> |
return ($row[3], $row[4], $row[5], $row[6]) if $row[7] <= 0; |
208 |
|
} |
209 |
|
if ($results->numrows () == 0) |
210 |
|
{ |
217 |
|
for (my $i = 1; $i <= $results->numrows (); $i++) |
218 |
|
{ |
219 |
|
my @row = $results->fetchrow (); |
220 |
< |
$map{"$i"} = [$row[3], $row[4], $row[5], $row[6]]; |
220 |
> |
$row[2] =~ s/([^ ]*) [^ ]*/$1/g; |
221 |
> |
$map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7]; |
222 |
> |
$map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7]; |
223 |
|
printf "(%2d) $row[0]\n", $i; |
224 |
< |
print " created by $row[1] on $row[2]\n"; |
224 |
> |
print " ("; |
225 |
> |
print color "green" if $row[5] eq "present"; |
226 |
> |
print color "bold yellow" if $row[5] eq "submitted"; |
227 |
> |
print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated"; |
228 |
> |
print $row[5]; |
229 |
> |
print color "reset"; |
230 |
> |
print ") created by $row[1] on $row[2]\n"; |
231 |
|
} |
232 |
|
print "\nWhich dataset would you like to use?: "; |
233 |
|
my $response = <STDIN>; |
244 |
|
sub |
245 |
|
outputCondor |
246 |
|
{ |
247 |
+ |
my $condorFileName = shift; |
248 |
|
my $workingDir = shift; |
249 |
|
my $nJobs = shift; |
250 |
+ |
my $dataset = shift; |
251 |
+ |
my $label = shift; |
252 |
|
|
253 |
|
my $cmsRun = `which cmsRun`; |
254 |
< |
open (SUB, ">$workingDir/condor.sub"); |
254 |
> |
my $condorFile = ""; |
255 |
|
|
256 |
< |
print SUB "Executable = $cmsRun\n"; |
257 |
< |
print SUB "Universe = vanilla\n"; |
258 |
< |
print SUB "Getenv = True\n"; |
259 |
< |
print SUB "Arguments = config_cfg.py $nJobs \$(Process)\n"; |
260 |
< |
print SUB "\n"; |
261 |
< |
print SUB "Output = condor_\$(Process).out\n"; |
262 |
< |
print SUB "Error = condor_\$(Process).err\n"; |
263 |
< |
print SUB "Log = condor_\$(Process).log\n"; |
264 |
< |
print SUB "\n"; |
265 |
< |
print SUB "+IsLocalJob = true\n"; |
266 |
< |
print SUB "Rank = TARGET.IsLocalSlot\n"; |
267 |
< |
print SUB "\n"; |
268 |
< |
print SUB "Queue $nJobs\n"; |
256 |
> |
if (!(-e $condorFileName)) |
257 |
> |
{ |
258 |
> |
my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)"; |
259 |
> |
$arguments .= " $dataset" if $dataset; |
260 |
> |
$arguments .= " NULL" if !$dataset; |
261 |
> |
$arguments .= " $label" if $label; |
262 |
> |
$arguments .= " NULL" if !$label; |
263 |
> |
$arguments .= "\n"; |
264 |
> |
|
265 |
> |
$condorFile .= "Executable = $cmsRun\n"; |
266 |
> |
$condorFile .= "Universe = vanilla\n"; |
267 |
> |
$condorFile .= "Getenv = True\n"; |
268 |
> |
$condorFile .= $arguments; |
269 |
> |
$condorFile .= "\n"; |
270 |
> |
$condorFile .= "Output = condor_\$(Process).out\n"; |
271 |
> |
$condorFile .= "Error = condor_\$(Process).err\n"; |
272 |
> |
$condorFile .= "Log = condor_\$(Process).log\n"; |
273 |
> |
$condorFile .= "\n"; |
274 |
> |
$condorFile .= "+IsLocalJob = true\n"; |
275 |
> |
$condorFile .= "Rank = TARGET.IsLocalSlot\n"; |
276 |
> |
$condorFile .= "\n"; |
277 |
> |
$condorFile .= "Queue $nJobs\n"; |
278 |
> |
} |
279 |
> |
else |
280 |
> |
{ |
281 |
> |
open (SUB, "<$condorFileName"); |
282 |
> |
my @condorFile = <SUB>; |
283 |
> |
close (SUB); |
284 |
> |
$condorFile = join ("", @condorFile); |
285 |
> |
$condorFile =~ s/\$cmsRun/$cmsRun/g; |
286 |
> |
$condorFile =~ s/\$nJobs/$nJobs/g; |
287 |
> |
$condorFile =~ s/\$dataset/$dataset/g if $dataset; |
288 |
> |
$condorFile =~ s/\$dataset/NULL/g if !$dataset; |
289 |
> |
$condorFile =~ s/\$label/$label/g if $label; |
290 |
> |
$condorFile =~ s/\$label/NULL/g if !$label; |
291 |
> |
} |
292 |
|
|
293 |
+ |
open (SUB, ">$workingDir/condor.sub"); |
294 |
+ |
print SUB $condorFile; |
295 |
|
close (SUB); |
296 |
|
} |
297 |
|
|
301 |
|
my $exeName = $0; |
302 |
|
$exeName =~ s/^.*\/([^\/]*)$/$1/; |
303 |
|
|
304 |
< |
print "Usage: $exeName [OPTION]... DATASET CONFIG DIRECTORY NJOBS\n"; |
304 |
> |
print "Usage: $exeName [OPTION]... [DATASET | LOCATION | LIST] CONFIG DIRECTORY NJOBS\n"; |
305 |
|
print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n"; |
306 |
|
print "\n"; |
307 |
+ |
printf "%-29s%s\n", " -d, --dataset DATASET", "override the dataset name"; |
308 |
|
printf "%-29s%s\n", " -h, --help", "print this help message"; |
309 |
+ |
printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label"; |
310 |
+ |
printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is"; |
311 |
+ |
printf "%-29s%s\n", " -p, --process PROCESS", "suffix for the process name"; |
312 |
+ |
printf "%-29s%s\n", " ", "to run over all events"; |
313 |
|
print "\n"; |
314 |
< |
print "The DATASET must exist in the Tier 3 ntuple database, and CONFIG must be a valid\n"; |
315 |
< |
print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a working\n"; |
316 |
< |
print "directory that is created and in which all output, both from the CMSSW jobs and\n"; |
317 |
< |
print "from Condor, is placed. Finally, NJOBS is the number of Condor jobs that will\n"; |
318 |
< |
print "be created.\n"; |
314 |
> |
print "The optional first argument must be either a DATASET registered in the Tier 3\n"; |
315 |
> |
print "ntuple database, a LOCATION which exists on disk, or a text file containing a\n"; |
316 |
> |
print "LIST of files to run over. CONFIG must be a valid CMSSW python configuration\n"; |
317 |
> |
print "which can be used with cmsRun. DIRECTORY is a working directory that is created\n"; |
318 |
> |
print " and in which all output, both from the CMSSW jobs and from Condor, is placed.\n"; |
319 |
> |
print "Finally, NJOBS is the number of Condor jobs that will be created.\n"; |
320 |
|
|
321 |
|
exit; |
322 |
|
} |