ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.22
Committed: Tue Jul 9 17:23:14 2013 UTC (11 years, 9 months ago) by ahart
Branch: MAIN
CVS Tags: V02-03-02, HEAD
Changes since 1.21: +1 -0 lines
Log Message:
Skip non-ROOT files when creating the run list.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Mysql;
5     use File::Copy;
6     use Getopt::Long;
7 ahart 1.5 use POSIX;
8 ahart 1.18 use Term::ANSIColor;
9 ahart 1.20 use Cwd 'abs_path';
10 ahart 1.2
11 ahart 1.21 select ((select (STDOUT), $| = 1)[0]);
12    
13 ahart 1.1 sub printHelp;
14     sub outputPset;
15     sub outputRunList;
16     sub getLocation;
17     sub outputCondor;
18    
19 ahart 1.3 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
20 ahart 1.1
21     my %opt;
22     Getopt::Long::Configure ("bundling");
23 ahart 1.20 GetOptions (\%opt, "dataset|d=s", "label|l=s", "maxEvents|m=s", "process|p=s", "help|h");
24 ahart 1.1 my $argc = @ARGV;
25    
26     printHelp () if $opt{"help"};
27 ahart 1.12 printHelp () if $argc != 3 && $argc != 4;
28     my $dataset;
29     my $config;
30     my $directory;
31     my $nJobs;
32     if ($argc == 3)
33 ahart 1.1 {
34 ahart 1.12 $dataset = "";
35     $config = $ARGV[0];
36     $directory = $ARGV[1];
37     $nJobs = $ARGV[2];
38     }
39     if ($argc == 4)
40     {
41     $dataset = $ARGV[0];
42     $config = $ARGV[1];
43     $directory = $ARGV[2];
44     $nJobs = $ARGV[3];
45     }
46     if (!(-e $config))
47     {
48     print "\"$config\" does not exist!\n";
49 ahart 1.1 exit;
50     }
51 ahart 1.12 if (-e $directory)
52 ahart 1.1 {
53 ahart 1.12 print "Directory \"$directory\" already exists!\n";
54 ahart 1.1 print "Please delete it or specify another working directory.\n";
55     exit;
56     }
57 ahart 1.12 mkdir $directory;
58     my $nFiles = outputRunList ($dataset, $directory);
59 ahart 1.13 my $realNJobs = $nJobs;
60     $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
61     my $eventsPerJob = -1;
62     $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
63     my $realMaxEvents = $eventsPerJob * $realNJobs;
64 ahart 1.20 outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob, $opt{"process"});
65     $dataset = $opt{"dataset"} if $opt{"dataset"};
66 ahart 1.16 outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
67 ahart 1.12 copy ($config, "$directory/userConfig_cfg.py");
68     chdir $directory;
69 ahart 1.13 print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
70     print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
71 ahart 1.19 system ("LD_LIBRARY_PATH=/usr/lib64/condor:\$LD_LIBRARY_PATH condor_submit condor.sub");
72 ahart 1.1
73     sub
74     outputPset
75     {
76     my $workingDir = shift;
77 ahart 1.12 my $dataset = shift;
78 ahart 1.13 my $nEvents = shift;
79     my $eventsPerJob = shift;
80 ahart 1.20 my $process = shift;
81    
82     $process =~ s/[^[:alnum:]]//g;
83 ahart 1.1
84     open (PSET, ">$workingDir/config_cfg.py");
85    
86     print PSET "import FWCore.ParameterSet.Config as cms\n";
87     print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
88     print PSET "import re\n";
89     print PSET "import userConfig_cfg as pset\n";
90     print PSET "\n";
91     print PSET "fileName = pset.process.TFileService.fileName\n";
92     print PSET "fileName = fileName.pythonValue ()\n";
93     print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
94     print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
95     print PSET "pset.process.TFileService.fileName = fileName\n";
96     print PSET "\n";
97 ahart 1.12 if ($dataset)
98     {
99     print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
100 ahart 1.13 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
101 ahart 1.12 }
102 ahart 1.13 print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
103 ahart 1.1 print PSET "process = pset.process\n";
104 ahart 1.20 print PSET "process.setName_ (process.name_ () + '$process')\n" if $process;
105 ahart 1.1
106     close (PSET);
107     }
108    
109     sub
110     outputRunList
111     {
112     my $dataset = shift;
113     my $workingDir = shift;
114    
115 ahart 1.12 return 0 if !$dataset;
116 ahart 1.11 my $location;
117     my $nFiles;
118     my $status;
119     my $crossSection;
120     my $isLocation = 0;
121 ahart 1.20 my $isRunList = 0;
122     $isLocation = 1 if -d $dataset;
123     $isRunList = 1 if -f $dataset;
124     $location = $dataset if $isLocation;
125     if (!$isLocation && !$isRunList)
126 ahart 1.11 {
127     ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
128     if ($status ne "present")
129     {
130     print "This dataset is not marked as present on the Tier 3!\n";
131     print "Continue anyway? (y/N): ";
132     my $response = <STDIN>;
133     $response =~ s/\n//g;
134     exit if !$response || lc ($response) ne "y";
135     }
136     if (!(-e $location))
137     {
138     print "The database does not know where this dataset is!\n";
139     exit;
140     }
141 ahart 1.1 }
142 ahart 1.20 my @files;
143     if (!$isRunList)
144     {
145     opendir (LOCATION, $location);
146     @files = readdir (LOCATION);
147     closedir (LOCATION);
148     }
149 ahart 1.11 else
150 ahart 1.1 {
151 ahart 1.20 open (RUNLIST, $dataset);
152     while (my $file = <RUNLIST>)
153     {
154     push (@files, abs_path ($file));
155     }
156     close (RUNLIST);
157 ahart 1.1 }
158 ahart 1.20 $nFiles = @files - 2 if $isLocation || $isRunList;
159     if (@files - 2 != $nFiles)
160 ahart 1.1 {
161     print "Number of files does not match database entry!\n";
162 ahart 1.9 print "Continue anyway? (y/N): ";
163     my $response = <STDIN>;
164     $response =~ s/\n//g;
165     exit if !$response || lc ($response) ne "y";
166 ahart 1.1 }
167     open (RUNLIST, ">$workingDir/runList.py");
168     print RUNLIST "runList = [\n";
169     for (my $i = 0; $i < @files; $i++)
170     {
171     next if $files[$i] eq ".";
172     next if $files[$i] eq "..";
173 ahart 1.22 next if !($files[$i] =~ m/\.root$/);
174 ahart 1.20 print RUNLIST "'file:" . abs_path ("$location/$files[$i]") . "'";
175 ahart 1.1 print RUNLIST "," if $i + 1 != @files;
176     print RUNLIST "\n";
177     }
178     print RUNLIST "]";
179     close (RUNLIST);
180 ahart 1.6 if ($crossSection && $crossSection >= 0.0)
181     {
182     open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
183     print CROSS_SECTION "$crossSection\n";
184     close (CROSS_SECTION);
185     }
186 ahart 1.5
187     return $nFiles;
188 ahart 1.1 }
189    
190     sub
191     getLocation
192     {
193     my $dataset = shift;
194    
195     my $results;
196 ahart 1.2 my $queryDataset = $dataset;
197     $queryDataset =~ s/\*/%/g;
198     $queryDataset =~ s/(.*)/%$1%/g;
199 ahart 1.15 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
200 ahart 1.1 $db->selectdb ("ntuple");
201     $results = $db->query ($query);
202     if ($results->numrows () == 1)
203     {
204     my @row = $results->fetchrow ();
205 ahart 1.21 $row[7] += 0;
206     return ($row[3], $row[4], $row[5], $row[7]) if $row[7] > 0;
207     return ($row[3], $row[4], $row[5], $row[6]) if $row[7] <= 0;
208 ahart 1.1 }
209     if ($results->numrows () == 0)
210     {
211     print "Dataset does not exist on the Tier 3!\n";
212     exit;
213     }
214     my %map;
215     print "Found multiple datasets matching\n";
216     print "\"$dataset\":\n";
217     for (my $i = 1; $i <= $results->numrows (); $i++)
218     {
219     my @row = $results->fetchrow ();
220 ahart 1.18 $row[2] =~ s/([^ ]*) [^ ]*/$1/g;
221 ahart 1.15 $map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
222     $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
223 ahart 1.2 printf "(%2d) $row[0]\n", $i;
224 ahart 1.18 print " (";
225     print color "green" if $row[5] eq "present";
226     print color "bold yellow" if $row[5] eq "submitted";
227     print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
228     print $row[5];
229     print color "reset";
230     print ") created by $row[1] on $row[2]\n";
231 ahart 1.1 }
232     print "\nWhich dataset would you like to use?: ";
233     my $response = <STDIN>;
234     $response =~ s/[ \t\n]//g;
235     if (!(exists $map{$response}))
236     {
237     print "Your selection \"$response\" was not a valid option! Quitting.\n";
238     exit;
239     }
240    
241 ahart 1.6 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
242 ahart 1.1 }
243    
244     sub
245     outputCondor
246     {
247 ahart 1.14 my $condorFileName = shift;
248 ahart 1.1 my $workingDir = shift;
249     my $nJobs = shift;
250 ahart 1.8 my $dataset = shift;
251 ahart 1.16 my $label = shift;
252 ahart 1.1
253     my $cmsRun = `which cmsRun`;
254 ahart 1.14 my $condorFile = "";
255 ahart 1.1
256 ahart 1.14 if (!(-e $condorFileName))
257     {
258 ahart 1.16 my $arguments = "Arguments = config_cfg.py True $nJobs \$(Process)";
259 ahart 1.17 $arguments .= " $dataset" if $dataset;
260     $arguments .= " NULL" if !$dataset;
261     $arguments .= " $label" if $label;
262     $arguments .= " NULL" if !$label;
263     $arguments .= "\n";
264 ahart 1.16
265 ahart 1.14 $condorFile .= "Executable = $cmsRun\n";
266     $condorFile .= "Universe = vanilla\n";
267     $condorFile .= "Getenv = True\n";
268 ahart 1.16 $condorFile .= $arguments;
269 ahart 1.14 $condorFile .= "\n";
270     $condorFile .= "Output = condor_\$(Process).out\n";
271     $condorFile .= "Error = condor_\$(Process).err\n";
272     $condorFile .= "Log = condor_\$(Process).log\n";
273     $condorFile .= "\n";
274     $condorFile .= "+IsLocalJob = true\n";
275     $condorFile .= "Rank = TARGET.IsLocalSlot\n";
276     $condorFile .= "\n";
277     $condorFile .= "Queue $nJobs\n";
278     }
279     else
280     {
281     open (SUB, "<$condorFileName");
282     my @condorFile = <SUB>;
283     close (SUB);
284     $condorFile = join ("", @condorFile);
285     $condorFile =~ s/\$cmsRun/$cmsRun/g;
286     $condorFile =~ s/\$nJobs/$nJobs/g;
287     $condorFile =~ s/\$dataset/$dataset/g if $dataset;
288     $condorFile =~ s/\$dataset/NULL/g if !$dataset;
289 ahart 1.16 $condorFile =~ s/\$label/$label/g if $label;
290     $condorFile =~ s/\$label/NULL/g if !$label;
291 ahart 1.14 }
292 ahart 1.1
293 ahart 1.14 open (SUB, ">$workingDir/condor.sub");
294     print SUB $condorFile;
295 ahart 1.1 close (SUB);
296     }
297    
298     sub
299     printHelp
300     {
301     my $exeName = $0;
302     $exeName =~ s/^.*\/([^\/]*)$/$1/;
303    
304 ahart 1.20 print "Usage: $exeName [OPTION]... [DATASET | LOCATION | LIST] CONFIG DIRECTORY NJOBS\n";
305 ahart 1.1 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
306     print "\n";
307 ahart 1.20 printf "%-29s%s\n", " -d, --dataset DATASET", "override the dataset name";
308 ahart 1.1 printf "%-29s%s\n", " -h, --help", "print this help message";
309 ahart 1.16 printf "%-29s%s\n", " -l, --label LABEL", "give the dataset a short label";
310 ahart 1.13 printf "%-29s%s\n", " -m, --maxEvents N", "only run over N events in the dataset; default is";
311 ahart 1.20 printf "%-29s%s\n", " -p, --process PROCESS", "suffix for the process name";
312 ahart 1.13 printf "%-29s%s\n", " ", "to run over all events";
313 ahart 1.1 print "\n";
314 ahart 1.12 print "The optional first argument must be either a DATASET registered in the Tier 3\n";
315 ahart 1.20 print "ntuple database, a LOCATION which exists on disk, or a text file containing a\n";
316     print "LIST of files to run over. CONFIG must be a valid CMSSW python configuration\n";
317     print "which can be used with cmsRun. DIRECTORY is a working directory that is created\n";
318     print " and in which all output, both from the CMSSW jobs and from Condor, is placed.\n";
319     print "Finally, NJOBS is the number of Condor jobs that will be created.\n";
320 ahart 1.1
321     exit;
322     }