ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
(Generate patch)

Comparing UserCode/OSUT3Analysis/DBTools/scripts/osusub (file contents):
Revision 1.5 by ahart, Wed Aug 15 08:43:26 2012 UTC vs.
Revision 1.20 by ahart, Wed Jun 19 13:40:44 2013 UTC

# Line 5 | Line 5 | use Mysql;
5   use File::Copy;
6   use Getopt::Long;
7   use POSIX;
8 + use Term::ANSIColor;
9 + use Cwd 'abs_path';
10  
11   sub printHelp;
12   sub outputPset;
# Line 16 | Line 18 | our $db = Mysql->connect ("cmshead", "nt
18  
19   my %opt;
20   Getopt::Long::Configure ("bundling");
21 < GetOptions (\%opt, "help|h");
21 > GetOptions (\%opt, "dataset|d=s", "label|l=s", "maxEvents|m=s", "process|p=s", "help|h");
22   my $argc = @ARGV;
23  
24   printHelp () if $opt{"help"};
25 < printHelp () if $argc != 4;
26 < if (!(-e $ARGV[1]))
25 > printHelp () if $argc != 3 && $argc != 4;
26 > my $dataset;
27 > my $config;
28 > my $directory;
29 > my $nJobs;
30 > if ($argc == 3)
31    {
32 <    print "\"$ARGV[1]\" does not exist!\n";
32 >    $dataset = "";
33 >    $config = $ARGV[0];
34 >    $directory = $ARGV[1];
35 >    $nJobs = $ARGV[2];
36 >  }
37 > if ($argc == 4)
38 >  {
39 >    $dataset = $ARGV[0];
40 >    $config = $ARGV[1];
41 >    $directory = $ARGV[2];
42 >    $nJobs = $ARGV[3];
43 >  }
44 > if (!(-e $config))
45 >  {
46 >    print "\"$config\" does not exist!\n";
47      exit;
48    }
49 < if (-e $ARGV[2])
49 > if (-e $directory)
50    {
51 <    print "Directory \"$ARGV[2]\" already exists!\n";
51 >    print "Directory \"$directory\" already exists!\n";
52      print "Please delete it or specify another working directory.\n";
53      exit;
54    }
55 < mkdir $ARGV[2];
56 < my $nFiles = outputRunList ($ARGV[0], $ARGV[2]);
57 < my $nJobs = $ARGV[3];
58 < my $realNJobs = ceil ($nFiles / ceil ($nFiles / $nJobs));
59 < outputPset ($ARGV[2]);
60 < outputCondor ($ARGV[2], $realNJobs);
61 < copy ($ARGV[1], "$ARGV[2]/userConfig_cfg.py");
62 < chdir $ARGV[2];
63 < print "Submitting $realNJobs jobs to run on $nFiles files.\n";
64 < system ("condor_submit condor.sub");
55 > mkdir $directory;
56 > my $nFiles = outputRunList ($dataset, $directory);
57 > my $realNJobs = $nJobs;
58 > $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
59 > my $eventsPerJob = -1;
60 > $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
61 > my $realMaxEvents = $eventsPerJob * $realNJobs;
62 > outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob, $opt{"process"});
63 > $dataset = $opt{"dataset"} if $opt{"dataset"};
64 > outputCondor ("$ENV{'CMSSW_BASE'}/src/OSUT3Analysis/DBTools/data/condor.sub", $directory, $realNJobs, $dataset, $opt{"label"});
65 > copy ($config, "$directory/userConfig_cfg.py");
66 > chdir $directory;
67 > print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
68 > print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
69 > system ("LD_LIBRARY_PATH=/usr/lib64/condor:\$LD_LIBRARY_PATH condor_submit condor.sub");
70  
71   sub
72   outputPset
73   {
74    my $workingDir = shift;
75 +  my $dataset = shift;
76 +  my $nEvents = shift;
77 +  my $eventsPerJob = shift;
78 +  my $process = shift;
79 +
80 +  $process =~ s/[^[:alnum:]]//g;
81  
82    open (PSET, ">$workingDir/config_cfg.py");
83  
# Line 61 | Line 92 | outputPset
92    print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
93    print PSET "pset.process.TFileService.fileName = fileName\n";
94    print PSET "\n";
95 <  print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
96 <  print PSET "pset.process.maxEvents.input = cms.untracked.int32 (-1)\n";
95 >  if ($dataset)
96 >    {
97 >      print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
98 >      print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
99 >    }
100 >  print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
101    print PSET "process = pset.process\n";
102 +  print PSET "process.setName_ (process.name_ () + '$process')\n" if $process;
103  
104    close (PSET);
105   }
# Line 74 | Line 110 | outputRunList
110    my $dataset = shift;
111    my $workingDir = shift;
112  
113 <  my ($location, $nFiles, $status) = getLocation ($dataset);
114 <  if ($status ne "present")
113 >  return 0 if !$dataset;
114 >  my $location;
115 >  my $nFiles;
116 >  my $status;
117 >  my $crossSection;
118 >  my $isLocation = 0;
119 >  my $isRunList = 0;
120 >  $isLocation = 1 if -d $dataset;
121 >  $isRunList = 1 if -f $dataset;
122 >  $location = $dataset if $isLocation;
123 >  if (!$isLocation && !$isRunList)
124      {
125 <      print "This dataset is not marked as present on the Tier 3!\n";
126 <      exit;
125 >      ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
126 >      if ($status ne "present")
127 >        {
128 >          print "This dataset is not marked as present on the Tier 3!\n";
129 >          print "Continue anyway? (y/N): ";
130 >          my $response = <STDIN>;
131 >          $response =~ s/\n//g;
132 >          exit if !$response || lc ($response) ne "y";
133 >        }
134 >      if (!(-e $location))
135 >        {
136 >          print "The database does not know where this dataset is!\n";
137 >          exit;
138 >        }
139      }
140 <  if (!(-e $location))
140 >  my @files;
141 >  if (!$isRunList)
142      {
143 <      print "The database does not know where this dataset is!\n";
144 <      exit;
143 >      opendir (LOCATION, $location);
144 >      @files = readdir (LOCATION);
145 >      closedir (LOCATION);
146 >    }
147 >  else
148 >    {
149 >      open (RUNLIST, $dataset);
150 >      while (my $file = <RUNLIST>)
151 >        {
152 >          push (@files, abs_path ($file));
153 >        }
154 >      close (RUNLIST);
155      }
156 <  opendir (LOCATION, $location);
89 <  my @files = readdir (LOCATION);
90 <  closedir (LOCATION);
156 >  $nFiles = @files - 2 if $isLocation || $isRunList;
157    if (@files - 2 != $nFiles)
158      {
159        print "Number of files does not match database entry!\n";
160 <      exit;
160 >      print "Continue anyway? (y/N): ";
161 >      my $response = <STDIN>;
162 >      $response =~ s/\n//g;
163 >      exit if !$response || lc ($response) ne "y";
164      }
165    open (RUNLIST, ">$workingDir/runList.py");
166    print RUNLIST "runList = [\n";
# Line 99 | Line 168 | outputRunList
168      {
169        next if $files[$i] eq ".";
170        next if $files[$i] eq "..";
171 <      print RUNLIST "'file:$location/$files[$i]'";
171 >      print RUNLIST "'file:" . abs_path ("$location/$files[$i]") . "'";
172        print RUNLIST "," if $i + 1 != @files;
173        print RUNLIST "\n";
174      }
175    print RUNLIST "]";
176    close (RUNLIST);
177 +  if ($crossSection && $crossSection >= 0.0)
178 +    {
179 +      open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
180 +      print CROSS_SECTION "$crossSection\n";
181 +      close (CROSS_SECTION);
182 +    }
183  
184    return $nFiles;
185   }
# Line 118 | Line 193 | getLocation
193    my $queryDataset = $dataset;
194    $queryDataset =~ s/\*/%/g;
195    $queryDataset =~ s/(.*)/%$1%/g;
196 <  my $query = "select dataset,user,creationTime,location,nFiles,status from ntuple where dataset like '$queryDataset' and status='present' order by creationTime";
196 >  my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn,higherOrderCrossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
197    $db->selectdb ("ntuple");
198    $results = $db->query ($query);
199    if ($results->numrows () == 1)
200      {
201        my @row = $results->fetchrow ();
202 <      return ($row[3], $row[4], $row[5]);
202 >      return ($row[3], $row[4], $row[5], $row[7]) if $row[7];
203 >      return ($row[3], $row[4], $row[5], $row[6]) if !$row[7];
204      }
205    if ($results->numrows () == 0)
206      {
# Line 137 | Line 213 | getLocation
213    for (my $i = 1; $i <= $results->numrows (); $i++)
214      {
215        my @row = $results->fetchrow ();
216 <      $map{"$i"} = [$row[3], $row[4], $row[5]];
216 >      $row[2] =~ s/([^ ]*) [^ ]*/$1/g;
217 >      $map{"$i"} = [$row[3], $row[4], $row[5], $row[7]] if $row[7];
218 >      $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]] if !$row[7];
219        printf "(%2d) $row[0]\n", $i;
220 <      print "     created by $row[1] on $row[2]\n";
220 >      print "     (";
221 >      print color "green" if $row[5] eq "present";
222 >      print color "bold yellow" if $row[5] eq "submitted";
223 >      print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
224 >      print $row[5];
225 >      print color "reset";
226 >      print ") created by $row[1] on $row[2]\n";
227      }
228    print "\nWhich dataset would you like to use?: ";
229    my $response = <STDIN>;
# Line 150 | Line 234 | getLocation
234        exit;
235      }
236  
237 <  return ($map{$response}[0], $map{$response}[1], $map{$response}[2]);
237 >  return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
238   }
239  
240   sub
241   outputCondor
242   {
243 +  my $condorFileName = shift;
244    my $workingDir = shift;
245    my $nJobs = shift;
246 +  my $dataset = shift;
247 +  my $label = shift;
248  
249    my $cmsRun = `which cmsRun`;
250 <  open (SUB, ">$workingDir/condor.sub");
250 >  my $condorFile = "";
251  
252 <  print SUB "Executable              = $cmsRun\n";
253 <  print SUB "Universe                = vanilla\n";
254 <  print SUB "Getenv                  = True\n";
255 <  print SUB "Arguments               = config_cfg.py $nJobs \$(Process)\n";
256 <  print SUB "\n";
257 <  print SUB "Output                  = condor_\$(Process).out\n";
258 <  print SUB "Error                   = condor_\$(Process).err\n";
259 <  print SUB "Log                     = condor_\$(Process).log\n";
260 <  print SUB "\n";
261 <  print SUB "Queue $nJobs\n";
252 >  if (!(-e $condorFileName))
253 >    {
254 >      my $arguments = "Arguments               = config_cfg.py True $nJobs \$(Process)";
255 >      $arguments .= " $dataset" if $dataset;
256 >      $arguments .= " NULL" if !$dataset;
257 >      $arguments .= " $label" if $label;
258 >      $arguments .= " NULL" if !$label;
259 >      $arguments .= "\n";
260 >
261 >      $condorFile .= "Executable              = $cmsRun\n";
262 >      $condorFile .= "Universe                = vanilla\n";
263 >      $condorFile .= "Getenv                  = True\n";
264 >      $condorFile .= $arguments;
265 >      $condorFile .= "\n";
266 >      $condorFile .= "Output                  = condor_\$(Process).out\n";
267 >      $condorFile .= "Error                   = condor_\$(Process).err\n";
268 >      $condorFile .= "Log                     = condor_\$(Process).log\n";
269 >      $condorFile .= "\n";
270 >      $condorFile .= "+IsLocalJob             = true\n";
271 >      $condorFile .= "Rank                    = TARGET.IsLocalSlot\n";
272 >      $condorFile .= "\n";
273 >      $condorFile .= "Queue $nJobs\n";
274 >    }
275 >  else
276 >    {
277 >      open (SUB, "<$condorFileName");
278 >      my @condorFile = <SUB>;
279 >      close (SUB);
280 >      $condorFile = join ("", @condorFile);
281 >      $condorFile =~ s/\$cmsRun/$cmsRun/g;
282 >      $condorFile =~ s/\$nJobs/$nJobs/g;
283 >      $condorFile =~ s/\$dataset/$dataset/g if $dataset;
284 >      $condorFile =~ s/\$dataset/NULL/g if !$dataset;
285 >      $condorFile =~ s/\$label/$label/g if $label;
286 >      $condorFile =~ s/\$label/NULL/g if !$label;
287 >    }
288  
289 +  open (SUB, ">$workingDir/condor.sub");
290 +  print SUB $condorFile;
291    close (SUB);
292   }
293  
# Line 182 | Line 297 | printHelp
297    my $exeName = $0;
298    $exeName =~ s/^.*\/([^\/]*)$/$1/;
299  
300 <  print "Usage: $exeName [OPTION]... DATASET CONFIG DIRECTORY NJOBS\n";
300 >  print "Usage: $exeName [OPTION]... [DATASET | LOCATION | LIST] CONFIG DIRECTORY NJOBS\n";
301    print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
302    print "\n";
303 +  printf "%-29s%s\n", "  -d, --dataset DATASET", "override the dataset name";
304    printf "%-29s%s\n", "  -h, --help", "print this help message";
305 +  printf "%-29s%s\n", "  -l, --label LABEL", "give the dataset a short label";
306 +  printf "%-29s%s\n", "  -m, --maxEvents N", "only run over N events in the dataset; default is";
307 +  printf "%-29s%s\n", "  -p, --process PROCESS", "suffix for the process name";
308 +  printf "%-29s%s\n", " ", "to run over all events";
309    print "\n";
310 <  print "The DATASET must exist in the Tier 3 ntuple database, and CONFIG must be a valid\n";
311 <  print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a working\n";
312 <  print "directory that is created and in which all output, both from the CMSSW jobs and\n";
313 <  print "from Condor, is placed. Finally, NJOBS is the number of Condor jobs that will\n";
314 <  print "be created.\n";
310 >  print "The optional first argument must be either a DATASET registered in the Tier 3\n";
311 >  print "ntuple database, a LOCATION which exists on disk, or a text file containing a\n";
312 >  print "LIST of files to run over. CONFIG must be a valid CMSSW python configuration\n";
313 >  print "which can be used with cmsRun. DIRECTORY is a working directory that is created\n";
314 >  print "  and in which all output, both from the CMSSW jobs and from Condor, is placed.\n";
315 >  print "Finally, NJOBS is the number of Condor jobs that will be created.\n";
316  
317    exit;
318   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines