ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.12
Committed: Mon Feb 4 04:14:43 2013 UTC (12 years, 3 months ago) by ahart
Branch: MAIN
Changes since 1.11: +48 -24 lines
Log Message:
Allow the dataset name to be omitted as in the case of MC generation.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use File::Copy;
6 use Getopt::Long;
7 use POSIX;
8
9 sub printHelp;
10 sub outputPset;
11 sub outputRunList;
12 sub getLocation;
13 sub outputCondor;
14
15 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
16
17 my %opt;
18 Getopt::Long::Configure ("bundling");
19 GetOptions (\%opt, "help|h");
20 my $argc = @ARGV;
21
22 printHelp () if $opt{"help"};
23 printHelp () if $argc != 3 && $argc != 4;
24 my $dataset;
25 my $config;
26 my $directory;
27 my $nJobs;
28 if ($argc == 3)
29 {
30 $dataset = "";
31 $config = $ARGV[0];
32 $directory = $ARGV[1];
33 $nJobs = $ARGV[2];
34 }
35 if ($argc == 4)
36 {
37 $dataset = $ARGV[0];
38 $config = $ARGV[1];
39 $directory = $ARGV[2];
40 $nJobs = $ARGV[3];
41 }
42 if (!(-e $config))
43 {
44 print "\"$config\" does not exist!\n";
45 exit;
46 }
47 if (-e $directory)
48 {
49 print "Directory \"$directory\" already exists!\n";
50 print "Please delete it or specify another working directory.\n";
51 exit;
52 }
53 mkdir $directory;
54 my $nFiles = outputRunList ($dataset, $directory);
55 my $realNJobs = 0;
56 $realNJobs = ceil ($nFiles / ceil ($nFiles / $nJobs)) if $nFiles;
57 outputPset ($directory, $dataset);
58 outputCondor ($directory, $realNJobs, $nJobs, $dataset);
59 copy ($config, "$directory/userConfig_cfg.py");
60 chdir $directory;
61 print "Submitting $realNJobs jobs to run on $nFiles files.\n";
62 system ("condor_submit condor.sub");
63
64 sub
65 outputPset
66 {
67 my $workingDir = shift;
68 my $dataset = shift;
69
70 open (PSET, ">$workingDir/config_cfg.py");
71
72 print PSET "import FWCore.ParameterSet.Config as cms\n";
73 print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
74 print PSET "import re\n";
75 print PSET "import userConfig_cfg as pset\n";
76 print PSET "\n";
77 print PSET "fileName = pset.process.TFileService.fileName\n";
78 print PSET "fileName = fileName.pythonValue ()\n";
79 print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
80 print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
81 print PSET "pset.process.TFileService.fileName = fileName\n";
82 print PSET "\n";
83 if ($dataset)
84 {
85 print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
86 print PSET "pset.process.maxEvents.input = cms.untracked.int32 (-1)\n";
87 }
88 print PSET "process = pset.process\n";
89
90 close (PSET);
91 }
92
93 sub
94 outputRunList
95 {
96 my $dataset = shift;
97 my $workingDir = shift;
98
99 return 0 if !$dataset;
100 my $location;
101 my $nFiles;
102 my $status;
103 my $crossSection;
104 my $isLocation = 0;
105 $isLocation = 1 if -e $dataset;
106 if (!$isLocation)
107 {
108 ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
109 if ($status ne "present")
110 {
111 print "This dataset is not marked as present on the Tier 3!\n";
112 print "Continue anyway? (y/N): ";
113 my $response = <STDIN>;
114 $response =~ s/\n//g;
115 exit if !$response || lc ($response) ne "y";
116 }
117 if (!(-e $location))
118 {
119 print "The database does not know where this dataset is!\n";
120 exit;
121 }
122 }
123 else
124 {
125 $location = $dataset;
126 }
127 opendir (LOCATION, $location);
128 my @files = readdir (LOCATION);
129 closedir (LOCATION);
130 if (!$isLocation && @files - 2 != $nFiles)
131 {
132 print "Number of files does not match database entry!\n";
133 print "Continue anyway? (y/N): ";
134 my $response = <STDIN>;
135 $response =~ s/\n//g;
136 exit if !$response || lc ($response) ne "y";
137 }
138 elsif ($isLocation)
139 {
140 $nFiles = @files - 2;
141 }
142 open (RUNLIST, ">$workingDir/runList.py");
143 print RUNLIST "runList = [\n";
144 for (my $i = 0; $i < @files; $i++)
145 {
146 next if $files[$i] eq ".";
147 next if $files[$i] eq "..";
148 print RUNLIST "'file:$location/$files[$i]'";
149 print RUNLIST "," if $i + 1 != @files;
150 print RUNLIST "\n";
151 }
152 print RUNLIST "]";
153 close (RUNLIST);
154 if ($crossSection && $crossSection >= 0.0)
155 {
156 open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
157 print CROSS_SECTION "$crossSection\n";
158 close (CROSS_SECTION);
159 }
160
161 return $nFiles;
162 }
163
164 sub
165 getLocation
166 {
167 my $dataset = shift;
168
169 my $results;
170 my $queryDataset = $dataset;
171 $queryDataset =~ s/\*/%/g;
172 $queryDataset =~ s/(.*)/%$1%/g;
173 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
174 $db->selectdb ("ntuple");
175 $results = $db->query ($query);
176 if ($results->numrows () == 1)
177 {
178 my @row = $results->fetchrow ();
179 return ($row[3], $row[4], $row[5], $row[6]);
180 }
181 if ($results->numrows () == 0)
182 {
183 print "Dataset does not exist on the Tier 3!\n";
184 exit;
185 }
186 my %map;
187 print "Found multiple datasets matching\n";
188 print "\"$dataset\":\n";
189 for (my $i = 1; $i <= $results->numrows (); $i++)
190 {
191 my @row = $results->fetchrow ();
192 $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]];
193 printf "(%2d) $row[0]\n", $i;
194 print " created by $row[1] on $row[2]\n";
195 }
196 print "\nWhich dataset would you like to use?: ";
197 my $response = <STDIN>;
198 $response =~ s/[ \t\n]//g;
199 if (!(exists $map{$response}))
200 {
201 print "Your selection \"$response\" was not a valid option! Quitting.\n";
202 exit;
203 }
204
205 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
206 }
207
208 sub
209 outputCondor
210 {
211 my $workingDir = shift;
212 my $realNJobs = shift;
213 my $nJobs = shift;
214 my $dataset = shift;
215
216 my $cmsRun = `which cmsRun`;
217 open (SUB, ">$workingDir/condor.sub");
218
219 print SUB "Executable = $cmsRun\n";
220 print SUB "Universe = vanilla\n";
221 print SUB "Getenv = True\n";
222 print SUB "Arguments = config_cfg.py True $realNJobs \$(Process) $dataset\n" if $dataset;
223 print SUB "Arguments = config_cfg.py True $realNJobs \$(Process) NULL\n" if !$dataset;
224 print SUB "\n";
225 print SUB "Output = condor_\$(Process).out\n";
226 print SUB "Error = condor_\$(Process).err\n";
227 print SUB "Log = condor_\$(Process).log\n";
228 print SUB "\n";
229 print SUB "+IsLocalJob = true\n";
230 print SUB "Rank = TARGET.IsLocalSlot\n";
231 print SUB "\n";
232 print SUB "Queue $nJobs\n";
233
234 close (SUB);
235 }
236
237 sub
238 printHelp
239 {
240 my $exeName = $0;
241 $exeName =~ s/^.*\/([^\/]*)$/$1/;
242
243 print "Usage: $exeName [OPTION]... [DATASET | LOCATION] CONFIG DIRECTORY NJOBS\n";
244 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
245 print "\n";
246 printf "%-29s%s\n", " -h, --help", "print this help message";
247 print "\n";
248 print "The optional first argument must be either a DATASET registered in the Tier 3\n";
249 print "ntuple database or a LOCATION which exists on disk. CONFIG must be a valid\n";
250 print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a\n";
251 print "working directory that is created and in which all output, both from the CMSSW\n";
252 print "jobs and from Condor, is placed. Finally, NJOBS is the number of Condor jobs\n";
253 print "that will be created.\n";
254
255 exit;
256 }