ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.20
Committed: Wed Jun 19 13:40:44 2013 UTC (11 years, 10 months ago) by ahart
Branch: MAIN
CVS Tags: V02-03-00
Changes since 1.19: +37 -20 lines
Log Message:
Added "-d" and "-p" options which can be used to override the dataset name that is subsequently picked up by osusub_cfg.py and to add a suffix to the process name of the CMSSW job, respectively.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Mysql;
5     use File::Copy;
6     use Getopt::Long;
7 ahart 1.5 use POSIX;
8 ahart 1.18 use Term::ANSIColor;
9 ahart 1.20 use Cwd 'abs_path';
10 ahart 1.2
11 ahart 1.1 sub printHelp;
12     sub outputPset;
13     sub outputRunList;
14     sub getLocation;
15     sub outputCondor;
16    
17 ahart 1.3 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
18 ahart 1.1
19     my %opt;
20     Getopt::Long::Configure ("bundling");
21 ahart 1.20 GetOptions (\%opt, "dataset|d=s", "label|l=s", "maxEvents|m=s", "process|p=s", "help|h");
22 ahart 1.1 my $argc = @ARGV;
23    
24     printHelp () if $opt{"help"};
25 ahart 1.12 printHelp () if $argc != 3 && $argc != 4;
26     my $dataset;
27     my $config;
28     my $directory;
29     my $nJobs;
30     if ($argc == 3)
31 ahart 1.1 {
32 ahart 1.12 $dataset = "";
33     $config = $ARGV[0];
34     $directory = $ARGV[1];
35     $nJobs = $ARGV[2];
36     }
37     if ($argc == 4)
38     {
39     $dataset = $ARGV[0];
40     $config = $ARGV[1];
41     $directory = $ARGV[2];
42     $nJobs = $ARGV[3];
43     }
44     if (!(-e $config))
45     {
46     print "\"$config\" does not exist!\n";
47 ahart 1.1 exit;
48     }
49 ahart 1.12 if (-e $directory)
50 ahart 1.1 {
51 ahart 1.12 print "Directory \"$directory\" already exists!\n";
52 ahart 1.1 print "Please delete it or specify another working directory.\n";
53     exit;
54     }
55 ahart 1.12 mkdir $directory;
56     my $nFiles = outputRunList ($dataset, $directory);
57 ahart 1.13 my $realNJobs = $nJobs;
58     $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
59     my $eventsPerJob = -1;
60     $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
61     my $realMaxEvents = $eventsPerJob * $realNJobs;
62 ahart 1.20 outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob, $opt{"process"});
63     $dataset = $opt{"dataset"} if $opt{"dataset"};
64 ahart 1.16 outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
65 ahart 1.12 copy ($config, "$directory/userConfig_cfg.py");
66     chdir $directory;
67 ahart 1.13 print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
68     print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
69 ahart 1.19 system ("LD_LIBRARY_PATH=/usr/lib64/condor:\$LD_LIBRARY_PATH condor_submit condor.sub");
70 ahart 1.1
71     sub
72     outputPset
73     {
74     my $workingDir = shift;
75 ahart 1.12 my $dataset = shift;
76 ahart 1.13 my $nEvents = shift;
77     my $eventsPerJob = shift;
78 ahart 1.20 my $process = shift;
79    
80     $process =~ s/[^[:alnum:]]//g;
81 ahart 1.1
82     open (PSET, ">$workingDir/config_cfg.py");
83    
84     print PSET "import FWCore.ParameterSet.Config as cms\n";
85     print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
86     print PSET "import re\n";
87     print PSET "import userConfig_cfg as pset\n";
88     print PSET "\n";
89     print PSET "fileName = pset.process.TFileService.fileName\n";
90     print PSET "fileName = fileName.pythonValue ()\n";
91     print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
92     print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
93     print PSET "pset.process.TFileService.fileName = fileName\n";
94     print PSET "\n";
95 ahart 1.12 if ($dataset)
96     {
97     print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
98 ahart 1.13 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
99 ahart 1.12 }
100 ahart 1.13 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
101 ahart 1.1 print PSET "process = pset.process\n";
102 ahart 1.20 print PSET "process.setName_ (process.name_ () + '$process')\n" if $process;
103 ahart 1.1
104     close (PSET);
105     }
106    
107     sub
108     outputRunList
109     {
110     my $dataset = shift;
111     my $workingDir = shift;
112    
113 ahart 1.12 return 0 if !$dataset;
114 ahart 1.11 my $location;
115     my $nFiles;
116     my $status;
117     my $crossSection;
118     my $isLocation = 0;
119 ahart 1.20 my $isRunList = 0;
120     $isLocation = 1 if -d $dataset;
121     $isRunList = 1 if -f $dataset;
122     $location = $dataset if $isLocation;
123     if (!$isLocation && !$isRunList)
124 ahart 1.11 {
125     ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
126     if ($status ne "present")
127     {
128     print "This dataset is not marked as present on the Tier 3!\n";
129     print "Continue anyway? (y/N): ";
130     my $response = <STDIN>;
131     $response =~ s/\n//g;
132     exit if !$response || lc ($response) ne "y";
133     }
134     if (!(-e $location))
135     {
136     print "The database does not know where this dataset is!\n";
137     exit;
138     }
139 ahart 1.1 }
140 ahart 1.20 my @files;
141     if (!$isRunList)
142     {
143     opendir (LOCATION, $location);
144     @files = readdir (LOCATION);
145     closedir (LOCATION);
146     }
147 ahart 1.11 else
148 ahart 1.1 {
149 ahart 1.20 open (RUNLIST, $dataset);
150     while (my $file = <RUNLIST>)
151     {
152     push (@files, abs_path ($file));
153     }
154     close (RUNLIST);
155 ahart 1.1 }
156 ahart 1.20 $nFiles = @files - 2 if $isLocation || $isRunList;
157     if (@files - 2 != $nFiles)
158 ahart 1.1 {
159     print "Number of files does not match database entry!\n";
160 ahart 1.9 print "Continue anyway? (y/N): ";
161     my $response = <STDIN>;
162     $response =~ s/\n//g;
163     exit if !$response || lc ($response) ne "y";
164 ahart 1.1 }
165     open (RUNLIST, ">$workingDir/runList.py");
166     print RUNLIST "runList = [\n";
167     for (my $i = 0; $i < @files; $i++)
168     {
169     next if $files[$i] eq ".";
170     next if $files[$i] eq "..";
171 ahart 1.20 print RUNLIST "'file:" . abs_path ("$location/$files[$i]") . "'";
172 ahart 1.1 print RUNLIST "," if $i + 1 != @files;
173     print RUNLIST "\n";
174     }
175     print RUNLIST "]";
176     close (RUNLIST);
177 ahart 1.6 if ($crossSection && $crossSection >= 0.0)
178     {
179     open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
180     print CROSS_SECTION "$crossSection\n";
181     close (CROSS_SECTION);
182     }
183 ahart 1.5
184     return $nFiles;
185 ahart 1.1 }
186    
187     sub
188     getLocation
189     {
190     my $dataset = shift;
191    
192     my $results;
193 ahart 1.2 my $queryDataset = $dataset;
194     $queryDataset =~ s/\*/%/g;
195     $queryDataset =~ s/(.*)/%$1%/g;
196 ahart 1.15 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
197 ahart 1.1 $db->selectdb ("ntuple");
198     $results = $db->query ($query);
199     if ($results->numrows () == 1)
200     {
201     my @row = $results->fetchrow ();
202 ahart 1.15 return ($row[3], $row[4], $row[5], $row[7]) if $row[7];
203     return ($row[3], $row[4], $row[5], $row[6]) if !$row[7];
204 ahart 1.1 }
205     if ($results->numrows () == 0)
206     {
207     print "Dataset does not exist on the Tier 3!\n";
208     exit;
209     }
210     my %map;
211     print "Found multiple datasets matching\n";
212     print "\"$dataset\":\n";
213     for (my $i = 1; $i <= $results->numrows (); $i++)
214     {
215     my @row = $results->fetchrow ();
216 ahart 1.18 $row[2] =~ s/([^ ]*) [^ ]*/$1/g;
217 ahart 1.15 $map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
218     $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
219 ahart 1.2 printf "(%2d) $row[0]\n", $i;
220 ahart 1.18 print " (";
221     print color "green" if $row[5] eq "present";
222     print color "bold yellow" if $row[5] eq "submitted";
223     print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
224     print $row[5];
225     print color "reset";
226     print ") created by $row[1] on $row[2]\n";
227 ahart 1.1 }
228     print "\nWhich dataset would you like to use?: ";
229     my $response = <STDIN>;
230     $response =~ s/[ \t\n]//g;
231     if (!(exists $map{$response}))
232     {
233     print "Your selection \"$response\" was not a valid option! Quitting.\n";
234     exit;
235     }
236    
237 ahart 1.6 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
238 ahart 1.1 }
239    
240     sub
241     outputCondor
242     {
243 ahart 1.14 my $condorFileName = shift;
244 ahart 1.1 my $workingDir = shift;
245     my $nJobs = shift;
246 ahart 1.8 my $dataset = shift;
247 ahart 1.16 my $label = shift;
248 ahart 1.1
249     my $cmsRun = `which cmsRun`;
250 ahart 1.14 my $condorFile = "";
251 ahart 1.1
252 ahart 1.14 if (!(-e $condorFileName))
253     {
254 ahart 1.16 my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)";
255 ahart 1.17 $arguments .= " $dataset" if $dataset;
256     $arguments .= " NULL" if !$dataset;
257     $arguments .= " $label" if $label;
258     $arguments .= " NULL" if !$label;
259     $arguments .= "\n";
260 ahart 1.16
261 ahart 1.14 $condorFile .= "Executable = $cmsRun\n";
262     $condorFile .= "Universe = vanilla\n";
263     $condorFile .= "Getenv = True\n";
264 ahart 1.16 $condorFile .= $arguments;
265 ahart 1.14 $condorFile .= "\n";
266     $condorFile .= "Output = condor_\$(Process).out\n";
267     $condorFile .= "Error = condor_\$(Process).err\n";
268     $condorFile .= "Log = condor_\$(Process).log\n";
269     $condorFile .= "\n";
270     $condorFile .= "+IsLocalJob = true\n";
271     $condorFile .= "Rank = TARGET.IsLocalSlot\n";
272     $condorFile .= "\n";
273     $condorFile .= "Queue $nJobs\n";
274     }
275     else
276     {
277     open (SUB, "<$condorFileName");
278     my @condorFile = <SUB>;
279     close (SUB);
280     $condorFile = join ("", @condorFile);
281     $condorFile =~ s/\$cmsRun/$cmsRun/g;
282     $condorFile =~ s/\$nJobs/$nJobs/g;
283     $condorFile =~ s/\$dataset/$dataset/g if $dataset;
284     $condorFile =~ s/\$dataset/NULL/g if !$dataset;
285 ahart 1.16 $condorFile =~ s/\$label/$label/g if $label;
286     $condorFile =~ s/\$label/NULL/g if !$label;
287 ahart 1.14 }
288 ahart 1.1
289 ahart 1.14 open (SUB, ">$workingDir/condor.sub");
290     print SUB $condorFile;
291 ahart 1.1 close (SUB);
292     }
293    
294     sub
295     printHelp
296     {
297     my $exeName = $0;
298     $exeName =~ s/^.*\/([^\/]*)$/$1/;
299    
300 ahart 1.20 print "Usage: $exeName [OPTION]... [DATASET | LOCATION | LIST] CONFIG DIRECTORY NJOBS\n";
301 ahart 1.1 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
302     print "\n";
303 ahart 1.20 printf "%-29s%s\n", " -d, --dataset DATASET", "override the dataset name";
304 ahart 1.1 printf "%-29s%s\n", " -h, --help", "print this help message";
305 ahart 1.16 printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label";
306 ahart 1.13 printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is";
307 ahart 1.20 printf "%-29s%s\n", " -p, --process PROCESS", "suffix for the process name";
308 ahart 1.13 printf "%-29s%s\n", " ", "to run over all events";
309 ahart 1.1 print "\n";
310 ahart 1.12 print "The optional first argument must be either a DATASET registered in the Tier 3\n";
311 ahart 1.20 print "ntuple database, a LOCATION which exists on disk, or a text file containing a\n";
312     print "LIST of files to run over. CONFIG must be a valid CMSSW python configuration\n";
313     print "which can be used with cmsRun. DIRECTORY is a working directory that is created\n";
314     print " and in which all output, both from the CMSSW jobs and from Condor, is placed.\n";
315     print "Finally, NJOBS is the number of Condor jobs that will be created.\n";
316 ahart 1.1
317     exit;
318     }