ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.18
Committed: Thu May 23 20:39:49 2013 UTC (11 years, 11 months ago) by ahart
Branch: MAIN
CVS Tags: V02-02-00, V02-01-01
Changes since 1.17: +9 -1 lines
Log Message:
When choosing among multiple datasets, the status of each dataset is now displayed in living color so hopefully it's easier to find the one you want.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use File::Copy;
6 use Getopt::Long;
7 use POSIX;
8 use Term::ANSIColor;
9
10 sub printHelp;
11 sub outputPset;
12 sub outputRunList;
13 sub getLocation;
14 sub outputCondor;
15
16 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
17
18 my %opt;
19 Getopt::Long::Configure ("bundling");
20 GetOptions (\%opt, "label|l=s", "maxEvents|m=s", "help|h");
21 my $argc = @ARGV;
22
23 printHelp () if $opt{"help"};
24 printHelp () if $argc != 3 && $argc != 4;
25 my $dataset;
26 my $config;
27 my $directory;
28 my $nJobs;
29 if ($argc == 3)
30 {
31 $dataset = "";
32 $config = $ARGV[0];
33 $directory = $ARGV[1];
34 $nJobs = $ARGV[2];
35 }
36 if ($argc == 4)
37 {
38 $dataset = $ARGV[0];
39 $config = $ARGV[1];
40 $directory = $ARGV[2];
41 $nJobs = $ARGV[3];
42 }
43 if (!(-e $config))
44 {
45 print "\"$config\" does not exist!\n";
46 exit;
47 }
48 if (-e $directory)
49 {
50 print "Directory \"$directory\" already exists!\n";
51 print "Please delete it or specify another working directory.\n";
52 exit;
53 }
54 mkdir $directory;
55 my $nFiles = outputRunList ($dataset, $directory);
56 my $realNJobs = $nJobs;
57 $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
58 my $eventsPerJob = -1;
59 $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
60 my $realMaxEvents = $eventsPerJob * $realNJobs;
61 outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob);
62 outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
63 copy ($config, "$directory/userConfig_cfg.py");
64 chdir $directory;
65 print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
66 print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
67 system ("condor_submit condor.sub");
68
69 sub
70 outputPset
71 {
72 my $workingDir = shift;
73 my $dataset = shift;
74 my $nEvents = shift;
75 my $eventsPerJob = shift;
76
77 open (PSET, ">$workingDir/config_cfg.py");
78
79 print PSET "import FWCore.ParameterSet.Config as cms\n";
80 print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
81 print PSET "import re\n";
82 print PSET "import userConfig_cfg as pset\n";
83 print PSET "\n";
84 print PSET "fileName = pset.process.TFileService.fileName\n";
85 print PSET "fileName = fileName.pythonValue ()\n";
86 print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
87 print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
88 print PSET "pset.process.TFileService.fileName = fileName\n";
89 print PSET "\n";
90 if ($dataset)
91 {
92 print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
93 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
94 }
95 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
96 print PSET "process = pset.process\n";
97
98 close (PSET);
99 }
100
101 sub
102 outputRunList
103 {
104 my $dataset = shift;
105 my $workingDir = shift;
106
107 return 0 if !$dataset;
108 my $location;
109 my $nFiles;
110 my $status;
111 my $crossSection;
112 my $isLocation = 0;
113 $isLocation = 1 if -e $dataset;
114 if (!$isLocation)
115 {
116 ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
117 if ($status ne "present")
118 {
119 print "This dataset is not marked as present on the Tier 3!\n";
120 print "Continue anyway? (y/N): ";
121 my $response = <STDIN>;
122 $response =~ s/\n//g;
123 exit if !$response || lc ($response) ne "y";
124 }
125 if (!(-e $location))
126 {
127 print "The database does not know where this dataset is!\n";
128 exit;
129 }
130 }
131 else
132 {
133 $location = $dataset;
134 }
135 opendir (LOCATION, $location);
136 my @files = readdir (LOCATION);
137 closedir (LOCATION);
138 if (!$isLocation && @files - 2 != $nFiles)
139 {
140 print "Number of files does not match database entry!\n";
141 print "Continue anyway? (y/N): ";
142 my $response = <STDIN>;
143 $response =~ s/\n//g;
144 exit if !$response || lc ($response) ne "y";
145 }
146 elsif ($isLocation)
147 {
148 $nFiles = @files - 2;
149 }
150 open (RUNLIST, ">$workingDir/runList.py");
151 print RUNLIST "runList = [\n";
152 for (my $i = 0; $i < @files; $i++)
153 {
154 next if $files[$i] eq ".";
155 next if $files[$i] eq "..";
156 print RUNLIST "'file:$location/$files[$i]'";
157 print RUNLIST "," if $i + 1 != @files;
158 print RUNLIST "\n";
159 }
160 print RUNLIST "]";
161 close (RUNLIST);
162 if ($crossSection && $crossSection >= 0.0)
163 {
164 open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
165 print CROSS_SECTION "$crossSection\n";
166 close (CROSS_SECTION);
167 }
168
169 return $nFiles;
170 }
171
172 sub
173 getLocation
174 {
175 my $dataset = shift;
176
177 my $results;
178 my $queryDataset = $dataset;
179 $queryDataset =~ s/\*/%/g;
180 $queryDataset =~ s/(.*)/%$1%/g;
181 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
182 $db->selectdb ("ntuple");
183 $results = $db->query ($query);
184 if ($results->numrows () == 1)
185 {
186 my @row = $results->fetchrow ();
187 return ($row[3], $row[4], $row[5], $row[7]) if $row[7];
188 return ($row[3], $row[4], $row[5], $row[6]) if !$row[7];
189 }
190 if ($results->numrows () == 0)
191 {
192 print "Dataset does not exist on the Tier 3!\n";
193 exit;
194 }
195 my %map;
196 print "Found multiple datasets matching\n";
197 print "\"$dataset\":\n";
198 for (my $i = 1; $i <= $results->numrows (); $i++)
199 {
200 my @row = $results->fetchrow ();
201 $row[2] =~ s/([^ ]*) [^ ]*/$1/g;
202 $map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
203 $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
204 printf "(%2d) $row[0]\n", $i;
205 print " (";
206 print color "green" if $row[5] eq "present";
207 print color "bold yellow" if $row[5] eq "submitted";
208 print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
209 print $row[5];
210 print color "reset";
211 print ") created by $row[1] on $row[2]\n";
212 }
213 print "\nWhich dataset would you like to use?: ";
214 my $response = <STDIN>;
215 $response =~ s/[ \t\n]//g;
216 if (!(exists $map{$response}))
217 {
218 print "Your selection \"$response\" was not a valid option! Quitting.\n";
219 exit;
220 }
221
222 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
223 }
224
225 sub
226 outputCondor
227 {
228 my $condorFileName = shift;
229 my $workingDir = shift;
230 my $nJobs = shift;
231 my $dataset = shift;
232 my $label = shift;
233
234 my $cmsRun = `which cmsRun`;
235 my $condorFile = "";
236
237 if (!(-e $condorFileName))
238 {
239 my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)";
240 $arguments .= " $dataset" if $dataset;
241 $arguments .= " NULL" if !$dataset;
242 $arguments .= " $label" if $label;
243 $arguments .= " NULL" if !$label;
244 $arguments .= "\n";
245
246 $condorFile .= "Executable = $cmsRun\n";
247 $condorFile .= "Universe = vanilla\n";
248 $condorFile .= "Getenv = True\n";
249 $condorFile .= $arguments;
250 $condorFile .= "\n";
251 $condorFile .= "Output = condor_\$(Process).out\n";
252 $condorFile .= "Error = condor_\$(Process).err\n";
253 $condorFile .= "Log = condor_\$(Process).log\n";
254 $condorFile .= "\n";
255 $condorFile .= "+IsLocalJob = true\n";
256 $condorFile .= "Rank = TARGET.IsLocalSlot\n";
257 $condorFile .= "\n";
258 $condorFile .= "Queue $nJobs\n";
259 }
260 else
261 {
262 open (SUB, "<$condorFileName");
263 my @condorFile = <SUB>;
264 close (SUB);
265 $condorFile = join ("", @condorFile);
266 $condorFile =~ s/\$cmsRun/$cmsRun/g;
267 $condorFile =~ s/\$nJobs/$nJobs/g;
268 $condorFile =~ s/\$dataset/$dataset/g if $dataset;
269 $condorFile =~ s/\$dataset/NULL/g if !$dataset;
270 $condorFile =~ s/\$label/$label/g if $label;
271 $condorFile =~ s/\$label/NULL/g if !$label;
272 }
273
274 open (SUB, ">$workingDir/condor.sub");
275 print SUB $condorFile;
276 close (SUB);
277 }
278
279 sub
280 printHelp
281 {
282 my $exeName = $0;
283 $exeName =~ s/^.*\/([^\/]*)$/$1/;
284
285 print "Usage: $exeName [OPTION]... [DATASET | LOCATION] CONFIG DIRECTORY NJOBS\n";
286 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
287 print "\n";
288 printf "%-29s%s\n", " -h, --help", "print this help message";
289 printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label";
290 printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is";
291 printf "%-29s%s\n", " ", "to run over all events";
292 print "\n";
293 print "The optional first argument must be either a DATASET registered in the Tier 3\n";
294 print "ntuple database or a LOCATION which exists on disk. CONFIG must be a valid\n";
295 print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a\n";
296 print "working directory that is created and in which all output, both from the CMSSW\n";
297 print "jobs and from Condor, is placed. Finally, NJOBS is the number of Condor jobs\n";
298 print "that will be created.\n";
299
300 exit;
301 }