ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.22
Committed: Tue Jul 9 17:23:14 2013 UTC (11 years, 9 months ago) by ahart
Branch: MAIN
CVS Tags: V02-03-02, HEAD
Changes since 1.21: +1 -0 lines
Log Message:
Skip non-ROOT files when creating the run list.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use File::Copy;
6 use Getopt::Long;
7 use POSIX;
8 use Term::ANSIColor;
9 use Cwd 'abs_path';
10
11 select ((select (STDOUT), $| = 1)[0]);
12
13 sub printHelp;
14 sub outputPset;
15 sub outputRunList;
16 sub getLocation;
17 sub outputCondor;
18
19 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
20
21 my %opt;
22 Getopt::Long::Configure ("bundling");
23 GetOptions (\%opt, "dataset|d=s", "label|l=s", "maxEvents|m=s", "process|p=s", "help|h");
24 my $argc = @ARGV;
25
26 printHelp () if $opt{"help"};
27 printHelp () if $argc != 3 && $argc != 4;
28 my $dataset;
29 my $config;
30 my $directory;
31 my $nJobs;
32 if ($argc == 3)
33 {
34 $dataset = "";
35 $config = $ARGV[0];
36 $directory = $ARGV[1];
37 $nJobs = $ARGV[2];
38 }
39 if ($argc == 4)
40 {
41 $dataset = $ARGV[0];
42 $config = $ARGV[1];
43 $directory = $ARGV[2];
44 $nJobs = $ARGV[3];
45 }
46 if (!(-e $config))
47 {
48 print "\"$config\" does not exist!\n";
49 exit;
50 }
51 if (-e $directory)
52 {
53 print "Directory \"$directory\" already exists!\n";
54 print "Please delete it or specify another working directory.\n";
55 exit;
56 }
57 mkdir $directory;
58 my $nFiles = outputRunList ($dataset, $directory);
59 my $realNJobs = $nJobs;
60 $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
61 my $eventsPerJob = -1;
62 $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
63 my $realMaxEvents = $eventsPerJob * $realNJobs;
64 outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob, $opt{"process"});
65 $dataset = $opt{"dataset"} if $opt{"dataset"};
66 outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
67 copy ($config, "$directory/userConfig_cfg.py");
68 chdir $directory;
69 print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
70 print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
71 system ("LD_LIBRARY_PATH=/usr/lib64/condor:\$LD_LIBRARY_PATH condor_submit condor.sub");
72
73 sub
74 outputPset
75 {
76 my $workingDir = shift;
77 my $dataset = shift;
78 my $nEvents = shift;
79 my $eventsPerJob = shift;
80 my $process = shift;
81
82 $process =~ s/[^[:alnum:]]//g;
83
84 open (PSET, ">$workingDir/config_cfg.py");
85
86 print PSET "import FWCore.ParameterSet.Config as cms\n";
87 print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
88 print PSET "import re\n";
89 print PSET "import userConfig_cfg as pset\n";
90 print PSET "\n";
91 print PSET "fileName = pset.process.TFileService.fileName\n";
92 print PSET "fileName = fileName.pythonValue ()\n";
93 print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
94 print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
95 print PSET "pset.process.TFileService.fileName = fileName\n";
96 print PSET "\n";
97 if ($dataset)
98 {
99 print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
100 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
101 }
102 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
103 print PSET "process = pset.process\n";
104 print PSET "process.setName_ (process.name_ () + '$process')\n" if $process;
105
106 close (PSET);
107 }
108
109 sub
110 outputRunList
111 {
112 my $dataset = shift;
113 my $workingDir = shift;
114
115 return 0 if !$dataset;
116 my $location;
117 my $nFiles;
118 my $status;
119 my $crossSection;
120 my $isLocation = 0;
121 my $isRunList = 0;
122 $isLocation = 1 if -d $dataset;
123 $isRunList = 1 if -f $dataset;
124 $location = $dataset if $isLocation;
125 if (!$isLocation && !$isRunList)
126 {
127 ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
128 if ($status ne "present")
129 {
130 print "This dataset is not marked as present on the Tier 3!\n";
131 print "Continue anyway? (y/N): ";
132 my $response = <STDIN>;
133 $response =~ s/\n//g;
134 exit if !$response || lc ($response) ne "y";
135 }
136 if (!(-e $location))
137 {
138 print "The database does not know where this dataset is!\n";
139 exit;
140 }
141 }
142 my @files;
143 if (!$isRunList)
144 {
145 opendir (LOCATION, $location);
146 @files = readdir (LOCATION);
147 closedir (LOCATION);
148 }
149 else
150 {
151 open (RUNLIST, $dataset);
152 while (my $file = <RUNLIST>)
153 {
154 push (@files, abs_path ($file));
155 }
156 close (RUNLIST);
157 }
158 $nFiles = @files - 2 if $isLocation || $isRunList;
159 if (@files - 2 != $nFiles)
160 {
161 print "Number of files does not match database entry!\n";
162 print "Continue anyway? (y/N): ";
163 my $response = <STDIN>;
164 $response =~ s/\n//g;
165 exit if !$response || lc ($response) ne "y";
166 }
167 open (RUNLIST, ">$workingDir/runList.py");
168 print RUNLIST "runList = [\n";
169 for (my $i = 0; $i < @files; $i++)
170 {
171 next if $files[$i] eq ".";
172 next if $files[$i] eq "..";
173 next if !($files[$i] =~ m/\.root$/);
174 print RUNLIST "'file:" . abs_path ("$location/$files[$i]") . "'";
175 print RUNLIST "," if $i + 1 != @files;
176 print RUNLIST "\n";
177 }
178 print RUNLIST "]";
179 close (RUNLIST);
180 if ($crossSection && $crossSection >= 0.0)
181 {
182 open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
183 print CROSS_SECTION "$crossSection\n";
184 close (CROSS_SECTION);
185 }
186
187 return $nFiles;
188 }
189
190 sub
191 getLocation
192 {
193 my $dataset = shift;
194
195 my $results;
196 my $queryDataset = $dataset;
197 $queryDataset =~ s/\*/%/g;
198 $queryDataset =~ s/(.*)/%$1%/g;
199 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
200 $db->selectdb ("ntuple");
201 $results = $db->query ($query);
202 if ($results->numrows () == 1)
203 {
204 my @row = $results->fetchrow ();
205 $row[7] += 0;
206 return ($row[3], $row[4], $row[5], $row[7]) if $row[7] > 0;
207 return ($row[3], $row[4], $row[5], $row[6]) if $row[7] <= 0;
208 }
209 if ($results->numrows () == 0)
210 {
211 print "Dataset does not exist on the Tier 3!\n";
212 exit;
213 }
214 my %map;
215 print "Found multiple datasets matching\n";
216 print "\"$dataset\":\n";
217 for (my $i = 1; $i <= $results->numrows (); $i++)
218 {
219 my @row = $results->fetchrow ();
220 $row[2] =~ s/([^ ]*) [^ ]*/$1/g;
221 $map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
222 $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
223 printf "(%2d) $row[0]\n", $i;
224 print " (";
225 print color "green" if $row[5] eq "present";
226 print color "bold yellow" if $row[5] eq "submitted";
227 print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
228 print $row[5];
229 print color "reset";
230 print ") created by $row[1] on $row[2]\n";
231 }
232 print "\nWhich dataset would you like to use?: ";
233 my $response = <STDIN>;
234 $response =~ s/[ \t\n]//g;
235 if (!(exists $map{$response}))
236 {
237 print "Your selection \"$response\" was not a valid option! Quitting.\n";
238 exit;
239 }
240
241 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
242 }
243
244 sub
245 outputCondor
246 {
247 my $condorFileName = shift;
248 my $workingDir = shift;
249 my $nJobs = shift;
250 my $dataset = shift;
251 my $label = shift;
252
253 my $cmsRun = `which cmsRun`;
254 my $condorFile = "";
255
256 if (!(-e $condorFileName))
257 {
258 my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)";
259 $arguments .= " $dataset" if $dataset;
260 $arguments .= " NULL" if !$dataset;
261 $arguments .= " $label" if $label;
262 $arguments .= " NULL" if !$label;
263 $arguments .= "\n";
264
265 $condorFile .= "Executable = $cmsRun\n";
266 $condorFile .= "Universe = vanilla\n";
267 $condorFile .= "Getenv = True\n";
268 $condorFile .= $arguments;
269 $condorFile .= "\n";
270 $condorFile .= "Output = condor_\$(Process).out\n";
271 $condorFile .= "Error = condor_\$(Process).err\n";
272 $condorFile .= "Log = condor_\$(Process).log\n";
273 $condorFile .= "\n";
274 $condorFile .= "+IsLocalJob = true\n";
275 $condorFile .= "Rank = TARGET.IsLocalSlot\n";
276 $condorFile .= "\n";
277 $condorFile .= "Queue $nJobs\n";
278 }
279 else
280 {
281 open (SUB, "<$condorFileName");
282 my @condorFile = <SUB>;
283 close (SUB);
284 $condorFile = join ("", @condorFile);
285 $condorFile =~ s/\$cmsRun/$cmsRun/g;
286 $condorFile =~ s/\$nJobs/$nJobs/g;
287 $condorFile =~ s/\$dataset/$dataset/g if $dataset;
288 $condorFile =~ s/\$dataset/NULL/g if !$dataset;
289 $condorFile =~ s/\$label/$label/g if $label;
290 $condorFile =~ s/\$label/NULL/g if !$label;
291 }
292
293 open (SUB, ">$workingDir/condor.sub");
294 print SUB $condorFile;
295 close (SUB);
296 }
297
298 sub
299 printHelp
300 {
301 my $exeName = $0;
302 $exeName =~ s/^.*\/([^\/]*)$/$1/;
303
304 print "Usage: $exeName [OPTION]... [DATASET | LOCATION | LIST] CONFIG DIRECTORY NJOBS\n";
305 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
306 print "\n";
307 printf "%-29s%s\n", " -d, --dataset DATASET", "override the dataset name";
308 printf "%-29s%s\n", " -h, --help", "print this help message";
309 printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label";
310 printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is";
311 printf "%-29s%s\n", " -p, --process PROCESS", "suffix for the process name";
312 printf "%-29s%s\n", " ", "to run over all events";
313 print "\n";
314 print "The optional first argument must be either a DATASET registered in the Tier 3\n";
315 print "ntuple database, a LOCATION which exists on disk, or a text file containing a\n";
316 print "LIST of files to run over. CONFIG must be a valid CMSSW python configuration\n";
317 print "which can be used with cmsRun. DIRECTORY is a working directory that is created\n";
318 print " and in which all output, both from the CMSSW jobs and from Condor, is placed.\n";
319 print "Finally, NJOBS is the number of Condor jobs that will be created.\n";
320
321 exit;
322 }