1 |
ahart |
1.1 |
#!/usr/bin/env perl
|
2 |
|
|
|
3 |
|
|
use strict;
|
4 |
|
|
use Mysql;
|
5 |
|
|
use File::Copy;
|
6 |
|
|
use Getopt::Long;
|
7 |
ahart |
1.5 |
use POSIX;
|
8 |
ahart |
1.2 |
|
9 |
ahart |
1.1 |
sub printHelp;
|
10 |
|
|
sub outputPset;
|
11 |
|
|
sub outputRunList;
|
12 |
|
|
sub getLocation;
|
13 |
|
|
sub outputCondor;
|
14 |
|
|
|
15 |
ahart |
1.3 |
our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
|
16 |
ahart |
1.1 |
|
17 |
|
|
my %opt;
|
18 |
|
|
Getopt::Long::Configure ("bundling");
|
19 |
|
|
GetOptions (\%opt, "help|h");
|
20 |
|
|
my $argc = @ARGV;
|
21 |
|
|
|
22 |
|
|
printHelp () if $opt{"help"};
|
23 |
|
|
printHelp () if $argc != 4;
|
24 |
|
|
if (!(-e $ARGV[1]))
|
25 |
|
|
{
|
26 |
|
|
print "\"$ARGV[1]\" does not exist!\n";
|
27 |
|
|
exit;
|
28 |
|
|
}
|
29 |
|
|
if (-e $ARGV[2])
|
30 |
|
|
{
|
31 |
|
|
print "Directory \"$ARGV[2]\" already exists!\n";
|
32 |
|
|
print "Please delete it or specify another working directory.\n";
|
33 |
|
|
exit;
|
34 |
|
|
}
|
35 |
|
|
mkdir $ARGV[2];
|
36 |
ahart |
1.5 |
my $nFiles = outputRunList ($ARGV[0], $ARGV[2]);
|
37 |
|
|
my $nJobs = $ARGV[3];
|
38 |
|
|
my $realNJobs = ceil ($nFiles / ceil ($nFiles / $nJobs));
|
39 |
ahart |
1.1 |
outputPset ($ARGV[2]);
|
40 |
ahart |
1.5 |
outputCondor ($ARGV[2], $realNJobs);
|
41 |
ahart |
1.1 |
copy ($ARGV[1], "$ARGV[2]/userConfig_cfg.py");
|
42 |
|
|
chdir $ARGV[2];
|
43 |
ahart |
1.5 |
print "Submitting $realNJobs jobs to run on $nFiles files.\n";
|
44 |
ahart |
1.1 |
system ("condor_submit condor.sub");
|
45 |
|
|
|
46 |
|
|
sub
|
47 |
|
|
outputPset
|
48 |
|
|
{
|
49 |
|
|
my $workingDir = shift;
|
50 |
|
|
|
51 |
|
|
open (PSET, ">$workingDir/config_cfg.py");
|
52 |
|
|
|
53 |
|
|
print PSET "import FWCore.ParameterSet.Config as cms\n";
|
54 |
|
|
print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
|
55 |
|
|
print PSET "import re\n";
|
56 |
|
|
print PSET "import userConfig_cfg as pset\n";
|
57 |
|
|
print PSET "\n";
|
58 |
|
|
print PSET "fileName = pset.process.TFileService.fileName\n";
|
59 |
|
|
print PSET "fileName = fileName.pythonValue ()\n";
|
60 |
|
|
print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
|
61 |
|
|
print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
|
62 |
|
|
print PSET "pset.process.TFileService.fileName = fileName\n";
|
63 |
|
|
print PSET "\n";
|
64 |
|
|
print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
|
65 |
|
|
print PSET "pset.process.maxEvents.input = cms.untracked.int32 (-1)\n";
|
66 |
|
|
print PSET "process = pset.process\n";
|
67 |
|
|
|
68 |
|
|
close (PSET);
|
69 |
|
|
}
|
70 |
|
|
|
71 |
|
|
sub
|
72 |
|
|
outputRunList
|
73 |
|
|
{
|
74 |
|
|
my $dataset = shift;
|
75 |
|
|
my $workingDir = shift;
|
76 |
|
|
|
77 |
|
|
my ($location, $nFiles, $status) = getLocation ($dataset);
|
78 |
|
|
if ($status ne "present")
|
79 |
|
|
{
|
80 |
|
|
print "This dataset is not marked as present on the Tier 3!\n";
|
81 |
|
|
exit;
|
82 |
|
|
}
|
83 |
|
|
if (!(-e $location))
|
84 |
|
|
{
|
85 |
|
|
print "The database does not know where this dataset is!\n";
|
86 |
|
|
exit;
|
87 |
|
|
}
|
88 |
|
|
opendir (LOCATION, $location);
|
89 |
|
|
my @files = readdir (LOCATION);
|
90 |
|
|
closedir (LOCATION);
|
91 |
|
|
if (@files - 2 != $nFiles)
|
92 |
|
|
{
|
93 |
|
|
print "Number of files does not match database entry!\n";
|
94 |
|
|
exit;
|
95 |
|
|
}
|
96 |
|
|
open (RUNLIST, ">$workingDir/runList.py");
|
97 |
|
|
print RUNLIST "runList = [\n";
|
98 |
|
|
for (my $i = 0; $i < @files; $i++)
|
99 |
|
|
{
|
100 |
|
|
next if $files[$i] eq ".";
|
101 |
|
|
next if $files[$i] eq "..";
|
102 |
|
|
print RUNLIST "'file:$location/$files[$i]'";
|
103 |
|
|
print RUNLIST "," if $i + 1 != @files;
|
104 |
|
|
print RUNLIST "\n";
|
105 |
|
|
}
|
106 |
|
|
print RUNLIST "]";
|
107 |
|
|
close (RUNLIST);
|
108 |
ahart |
1.5 |
|
109 |
|
|
return $nFiles;
|
110 |
ahart |
1.1 |
}
|
111 |
|
|
|
112 |
|
|
sub
|
113 |
|
|
getLocation
|
114 |
|
|
{
|
115 |
|
|
my $dataset = shift;
|
116 |
|
|
|
117 |
|
|
my $results;
|
118 |
ahart |
1.2 |
my $queryDataset = $dataset;
|
119 |
|
|
$queryDataset =~ s/\*/%/g;
|
120 |
|
|
$queryDataset =~ s/(.*)/%$1%/g;
|
121 |
ahart |
1.4 |
my $query = "select dataset,user,creationTime,location,nFiles,status from ntuple where dataset like '$queryDataset' and status='present' order by creationTime";
|
122 |
ahart |
1.1 |
$db->selectdb ("ntuple");
|
123 |
|
|
$results = $db->query ($query);
|
124 |
|
|
if ($results->numrows () == 1)
|
125 |
|
|
{
|
126 |
|
|
my @row = $results->fetchrow ();
|
127 |
ahart |
1.2 |
return ($row[3], $row[4], $row[5]);
|
128 |
ahart |
1.1 |
}
|
129 |
|
|
if ($results->numrows () == 0)
|
130 |
|
|
{
|
131 |
|
|
print "Dataset does not exist on the Tier 3!\n";
|
132 |
|
|
exit;
|
133 |
|
|
}
|
134 |
|
|
my %map;
|
135 |
|
|
print "Found multiple datasets matching\n";
|
136 |
|
|
print "\"$dataset\":\n";
|
137 |
|
|
for (my $i = 1; $i <= $results->numrows (); $i++)
|
138 |
|
|
{
|
139 |
|
|
my @row = $results->fetchrow ();
|
140 |
ahart |
1.2 |
$map{"$i"} = [$row[3], $row[4], $row[5]];
|
141 |
|
|
printf "(%2d) $row[0]\n", $i;
|
142 |
|
|
print " created by $row[1] on $row[2]\n";
|
143 |
ahart |
1.1 |
}
|
144 |
|
|
print "\nWhich dataset would you like to use?: ";
|
145 |
|
|
my $response = <STDIN>;
|
146 |
|
|
$response =~ s/[ \t\n]//g;
|
147 |
|
|
if (!(exists $map{$response}))
|
148 |
|
|
{
|
149 |
|
|
print "Your selection \"$response\" was not a valid option! Quitting.\n";
|
150 |
|
|
exit;
|
151 |
|
|
}
|
152 |
|
|
|
153 |
|
|
return ($map{$response}[0], $map{$response}[1], $map{$response}[2]);
|
154 |
|
|
}
|
155 |
|
|
|
156 |
|
|
sub
|
157 |
|
|
outputCondor
|
158 |
|
|
{
|
159 |
|
|
my $workingDir = shift;
|
160 |
|
|
my $nJobs = shift;
|
161 |
|
|
|
162 |
|
|
my $cmsRun = `which cmsRun`;
|
163 |
|
|
open (SUB, ">$workingDir/condor.sub");
|
164 |
|
|
|
165 |
|
|
print SUB "Executable = $cmsRun\n";
|
166 |
|
|
print SUB "Universe = vanilla\n";
|
167 |
|
|
print SUB "Getenv = True\n";
|
168 |
|
|
print SUB "Arguments = config_cfg.py $nJobs \$(Process)\n";
|
169 |
|
|
print SUB "\n";
|
170 |
|
|
print SUB "Output = condor_\$(Process).out\n";
|
171 |
|
|
print SUB "Error = condor_\$(Process).err\n";
|
172 |
|
|
print SUB "Log = condor_\$(Process).log\n";
|
173 |
|
|
print SUB "\n";
|
174 |
|
|
print SUB "Queue $nJobs\n";
|
175 |
|
|
|
176 |
|
|
close (SUB);
|
177 |
|
|
}
|
178 |
|
|
|
179 |
|
|
sub
|
180 |
|
|
printHelp
|
181 |
|
|
{
|
182 |
|
|
my $exeName = $0;
|
183 |
|
|
$exeName =~ s/^.*\/([^\/]*)$/$1/;
|
184 |
|
|
|
185 |
ahart |
1.3 |
print "Usage: $exeName [OPTION]... DATASET CONFIG DIRECTORY NJOBS\n";
|
186 |
ahart |
1.1 |
print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
|
187 |
|
|
print "\n";
|
188 |
|
|
printf "%-29s%s\n", " -h, --help", "print this help message";
|
189 |
|
|
print "\n";
|
190 |
|
|
print "The DATASET must exist in the Tier 3 ntuple database, and CONFIG must be a valid\n";
|
191 |
|
|
print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a working\n";
|
192 |
|
|
print "directory that is created and in which all output, both from the CMSSW jobs and\n";
|
193 |
|
|
print "from Condor, is placed. Finally, NJOBS is the number of Condor jobs that will\n";
|
194 |
|
|
print "be created.\n";
|
195 |
|
|
|
196 |
|
|
exit;
|
197 |
|
|
}
|