ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
(Generate patch)

Comparing UserCode/OSUT3Analysis/DBTools/scripts/osusub (file contents):
Revision 1.7 by ahart, Tue Oct 9 10:20:47 2012 UTC vs.
Revision 1.13 by ahart, Fri Feb 8 09:17:45 2013 UTC

# Line 16 | Line 16 | our $db = Mysql->connect ("cmshead", "nt
16  
17   my %opt;
18   Getopt::Long::Configure ("bundling");
19 < GetOptions (\%opt, "help|h");
19 > GetOptions (\%opt, "maxEvents|m=s", "help|h");
20   my $argc = @ARGV;
21  
22   printHelp () if $opt{"help"};
23 < printHelp () if $argc != 4;
24 < if (!(-e $ARGV[1]))
23 > printHelp () if $argc != 3 && $argc != 4;
24 > my $dataset;
25 > my $config;
26 > my $directory;
27 > my $nJobs;
28 > if ($argc == 3)
29    {
30 <    print "\"$ARGV[1]\" does not exist!\n";
30 >    $dataset = "";
31 >    $config = $ARGV[0];
32 >    $directory = $ARGV[1];
33 >    $nJobs = $ARGV[2];
34 >  }
35 > if ($argc == 4)
36 >  {
37 >    $dataset = $ARGV[0];
38 >    $config = $ARGV[1];
39 >    $directory = $ARGV[2];
40 >    $nJobs = $ARGV[3];
41 >  }
42 > if (!(-e $config))
43 >  {
44 >    print "\"$config\" does not exist!\n";
45      exit;
46    }
47 < if (-e $ARGV[2])
47 > if (-e $directory)
48    {
49 <    print "Directory \"$ARGV[2]\" already exists!\n";
49 >    print "Directory \"$directory\" already exists!\n";
50      print "Please delete it or specify another working directory.\n";
51      exit;
52    }
53 < mkdir $ARGV[2];
54 < my $nFiles = outputRunList ($ARGV[0], $ARGV[2]);
55 < my $nJobs = $ARGV[3];
56 < my $realNJobs = ceil ($nFiles / ceil ($nFiles / $nJobs));
57 < outputPset ($ARGV[2]);
58 < outputCondor ($ARGV[2], $realNJobs);
59 < copy ($ARGV[1], "$ARGV[2]/userConfig_cfg.py");
60 < chdir $ARGV[2];
61 < print "Submitting $realNJobs jobs to run on $nFiles files.\n";
53 > mkdir $directory;
54 > my $nFiles = outputRunList ($dataset, $directory);
55 > my $realNJobs = $nJobs;
56 > $realNJobs = ceil ($nFiles / ceil ($nFiles / $realNJobs)) if $nFiles;
57 > my $eventsPerJob = -1;
58 > $eventsPerJob = ceil ($opt{"maxEvents"} / $realNJobs) if $opt{"maxEvents"} && $opt{"maxEvents"} >= 0;
59 > my $realMaxEvents = $eventsPerJob * $realNJobs;
60 > outputPset ($directory, $dataset, $opt{"maxEvents"}, $eventsPerJob);
61 > outputCondor ($directory, $realNJobs, $dataset);
62 > copy ($config, "$directory/userConfig_cfg.py");
63 > chdir $directory;
64 > print "Submitting $realNJobs jobs to run on $realMaxEvents events in $nFiles files.\n" if $realMaxEvents >= 0;
65 > print "Submitting $realNJobs jobs to run on all events in $nFiles files.\n" if $realMaxEvents < 0;
66   system ("condor_submit condor.sub");
67  
68   sub
69   outputPset
70   {
71    my $workingDir = shift;
72 +  my $dataset = shift;
73 +  my $nEvents = shift;
74 +  my $eventsPerJob = shift;
75  
76    open (PSET, ">$workingDir/config_cfg.py");
77  
# Line 61 | Line 86 | outputPset
86    print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
87    print PSET "pset.process.TFileService.fileName = fileName\n";
88    print PSET "\n";
89 <  print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
90 <  print PSET "pset.process.maxEvents.input = cms.untracked.int32 (-1)\n";
89 >  if ($dataset)
90 >    {
91 >      print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
92 >      print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob < 0;
93 >    }
94 >  print PSET "pset.process.maxEvents.input = cms.untracked.int32 ($eventsPerJob)\n" if $eventsPerJob >= 0;
95    print PSET "process = pset.process\n";
96  
97    close (PSET);
# Line 74 | Line 103 | outputRunList
103    my $dataset = shift;
104    my $workingDir = shift;
105  
106 <  my ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
107 <  if ($status ne "present")
108 <    {
109 <      print "This dataset is not marked as present on the Tier 3!\n";
110 <      exit;
106 >  return 0 if !$dataset;
107 >  my $location;
108 >  my $nFiles;
109 >  my $status;
110 >  my $crossSection;
111 >  my $isLocation = 0;
112 >  $isLocation = 1 if -e $dataset;
113 >  if (!$isLocation)
114 >    {
115 >      ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
116 >      if ($status ne "present")
117 >        {
118 >          print "This dataset is not marked as present on the Tier 3!\n";
119 >          print "Continue anyway? (y/N): ";
120 >          my $response = <STDIN>;
121 >          $response =~ s/\n//g;
122 >          exit if !$response || lc ($response) ne "y";
123 >        }
124 >      if (!(-e $location))
125 >        {
126 >          print "The database does not know where this dataset is!\n";
127 >          exit;
128 >        }
129      }
130 <  if (!(-e $location))
130 >  else
131      {
132 <      print "The database does not know where this dataset is!\n";
86 <      exit;
132 >      $location = $dataset;
133      }
134    opendir (LOCATION, $location);
135    my @files = readdir (LOCATION);
136    closedir (LOCATION);
137 <  if (@files - 2 != $nFiles)
137 >  if (!$isLocation && @files - 2 != $nFiles)
138      {
139        print "Number of files does not match database entry!\n";
140 <      exit;
140 >      print "Continue anyway? (y/N): ";
141 >      my $response = <STDIN>;
142 >      $response =~ s/\n//g;
143 >      exit if !$response || lc ($response) ne "y";
144 >    }
145 >  elsif ($isLocation)
146 >    {
147 >      $nFiles = @files - 2;
148      }
149    open (RUNLIST, ">$workingDir/runList.py");
150    print RUNLIST "runList = [\n";
# Line 124 | Line 177 | getLocation
177    my $queryDataset = $dataset;
178    $queryDataset =~ s/\*/%/g;
179    $queryDataset =~ s/(.*)/%$1%/g;
180 <  my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn from ntuple where dataset like '$queryDataset' and status='present' order by creationTime";
180 >  my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
181    $db->selectdb ("ntuple");
182    $results = $db->query ($query);
183    if ($results->numrows () == 1)
# Line 164 | Line 217 | outputCondor
217   {
218    my $workingDir = shift;
219    my $nJobs = shift;
220 +  my $dataset = shift;
221  
222    my $cmsRun = `which cmsRun`;
223    open (SUB, ">$workingDir/condor.sub");
# Line 171 | Line 225 | outputCondor
225    print SUB "Executable              = $cmsRun\n";
226    print SUB "Universe                = vanilla\n";
227    print SUB "Getenv                  = True\n";
228 <  print SUB "Arguments               = config_cfg.py $nJobs \$(Process)\n";
228 >  print SUB "Arguments               = config_cfg.py True $nJobs \$(Process) $dataset\n" if $dataset;
229 >  print SUB "Arguments               = config_cfg.py True $nJobs \$(Process) NULL\n" if !$dataset;
230    print SUB "\n";
231    print SUB "Output                  = condor_\$(Process).out\n";
232    print SUB "Error                   = condor_\$(Process).err\n";
# Line 191 | Line 246 | printHelp
246    my $exeName = $0;
247    $exeName =~ s/^.*\/([^\/]*)$/$1/;
248  
249 <  print "Usage: $exeName [OPTION]... DATASET CONFIG DIRECTORY NJOBS\n";
249 >  print "Usage: $exeName [OPTION]... [DATASET | LOCATION] CONFIG DIRECTORY NJOBS\n";
250    print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
251    print "\n";
252    printf "%-29s%s\n", "  -h, --help", "print this help message";
253 +  printf "%-29s%s\n", "  -m, --maxEvents N", "only run over N events in the dataset; default is";
254 +  printf "%-29s%s\n", " ", "to run over all events";
255    print "\n";
256 <  print "The DATASET must exist in the Tier 3 ntuple database, and CONFIG must be a valid\n";
257 <  print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a working\n";
258 <  print "directory that is created and in which all output, both from the CMSSW jobs and\n";
259 <  print "from Condor, is placed. Finally, NJOBS is the number of Condor jobs that will\n";
260 <  print "be created.\n";
256 >  print "The optional first argument must be either a DATASET registered in the Tier 3\n";
257 >  print "ntuple database or a LOCATION which exists on disk. CONFIG must be a valid\n";
258 >  print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a\n";
259 >  print "working directory that is created and in which all output, both from the CMSSW\n";
260 >  print "jobs and from Condor, is placed. Finally, NJOBS is the number of Condor jobs\n";
261 >  print "that will be created.\n";
262  
263    exit;
264   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines