ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osusub
Revision: 1.9
Committed: Wed Dec 12 07:42:10 2012 UTC (12 years, 5 months ago) by ahart
Branch: MAIN
Changes since 1.8: +9 -3 lines
Log Message:
Allow running over incomplete datasets.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use File::Copy;
6 use Getopt::Long;
7 use POSIX;
8
9 sub printHelp;
10 sub outputPset;
11 sub outputRunList;
12 sub getLocation;
13 sub outputCondor;
14
15 our $db = Mysql->connect ("cmshead", "ntuple", "osuT3User");
16
17 my %opt;
18 Getopt::Long::Configure ("bundling");
19 GetOptions (\%opt, "help|h");
20 my $argc = @ARGV;
21
22 printHelp () if $opt{"help"};
23 printHelp () if $argc != 4;
24 if (!(-e $ARGV[1]))
25 {
26 print "\"$ARGV[1]\" does not exist!\n";
27 exit;
28 }
29 if (-e $ARGV[2])
30 {
31 print "Directory \"$ARGV[2]\" already exists!\n";
32 print "Please delete it or specify another working directory.\n";
33 exit;
34 }
35 mkdir $ARGV[2];
36 my $nFiles = outputRunList ($ARGV[0], $ARGV[2]);
37 my $nJobs = $ARGV[3];
38 my $realNJobs = ceil ($nFiles / ceil ($nFiles / $nJobs));
39 outputPset ($ARGV[2]);
40 outputCondor ($ARGV[2], $realNJobs, $ARGV[0]);
41 copy ($ARGV[1], "$ARGV[2]/userConfig_cfg.py");
42 chdir $ARGV[2];
43 print "Submitting $realNJobs jobs to run on $nFiles files.\n";
44 system ("condor_submit condor.sub");
45
46 sub
47 outputPset
48 {
49 my $workingDir = shift;
50
51 open (PSET, ">$workingDir/config_cfg.py");
52
53 print PSET "import FWCore.ParameterSet.Config as cms\n";
54 print PSET "import OSUT3Analysis.DBTools.osusub_cfg as osusub\n";
55 print PSET "import re\n";
56 print PSET "import userConfig_cfg as pset\n";
57 print PSET "\n";
58 print PSET "fileName = pset.process.TFileService.fileName\n";
59 print PSET "fileName = fileName.pythonValue ()\n";
60 print PSET "fileName = fileName[1:(len (fileName) - 1)]\n";
61 print PSET "fileName = re.sub (r'^(.*)\\.([^\\.]*)\$', r'\\1_' + str (osusub.jobNumber) + r'.\\2', fileName)\n";
62 print PSET "pset.process.TFileService.fileName = fileName\n";
63 print PSET "\n";
64 print PSET "pset.process.source.fileNames = cms.untracked.vstring (osusub.runList)\n";
65 print PSET "pset.process.maxEvents.input = cms.untracked.int32 (-1)\n";
66 print PSET "process = pset.process\n";
67
68 close (PSET);
69 }
70
71 sub
72 outputRunList
73 {
74 my $dataset = shift;
75 my $workingDir = shift;
76
77 my ($location, $nFiles, $status, $crossSection) = getLocation ($dataset);
78 if ($status ne "present")
79 {
80 print "This dataset is not marked as present on the Tier 3!\n";
81 print "Continue anyway? (y/N): ";
82 my $response = <STDIN>;
83 $response =~ s/\n//g;
84 exit if !$response || lc ($response) ne "y";
85 }
86 if (!(-e $location))
87 {
88 print "The database does not know where this dataset is!\n";
89 exit;
90 }
91 opendir (LOCATION, $location);
92 my @files = readdir (LOCATION);
93 closedir (LOCATION);
94 if (@files - 2 != $nFiles)
95 {
96 print "Number of files does not match database entry!\n";
97 print "Continue anyway? (y/N): ";
98 my $response = <STDIN>;
99 $response =~ s/\n//g;
100 exit if !$response || lc ($response) ne "y";
101 }
102 open (RUNLIST, ">$workingDir/runList.py");
103 print RUNLIST "runList = [\n";
104 for (my $i = 0; $i < @files; $i++)
105 {
106 next if $files[$i] eq ".";
107 next if $files[$i] eq "..";
108 print RUNLIST "'file:$location/$files[$i]'";
109 print RUNLIST "," if $i + 1 != @files;
110 print RUNLIST "\n";
111 }
112 print RUNLIST "]";
113 close (RUNLIST);
114 if ($crossSection && $crossSection >= 0.0)
115 {
116 open (CROSS_SECTION, ">$workingDir/crossSectionInPicobarn.txt");
117 print CROSS_SECTION "$crossSection\n";
118 close (CROSS_SECTION);
119 }
120
121 return $nFiles;
122 }
123
124 sub
125 getLocation
126 {
127 my $dataset = shift;
128
129 my $results;
130 my $queryDataset = $dataset;
131 $queryDataset =~ s/\*/%/g;
132 $queryDataset =~ s/(.*)/%$1%/g;
133 my $query = "select dataset,user,creationTime,location,nFiles,status,crossSectionInPicobarn from ntuple where dataset like '$queryDataset' order by creationTime";
134 $db->selectdb ("ntuple");
135 $results = $db->query ($query);
136 if ($results->numrows () == 1)
137 {
138 my @row = $results->fetchrow ();
139 return ($row[3], $row[4], $row[5], $row[6]);
140 }
141 if ($results->numrows () == 0)
142 {
143 print "Dataset does not exist on the Tier 3!\n";
144 exit;
145 }
146 my %map;
147 print "Found multiple datasets matching\n";
148 print "\"$dataset\":\n";
149 for (my $i = 1; $i <= $results->numrows (); $i++)
150 {
151 my @row = $results->fetchrow ();
152 $map{"$i"} = [$row[3], $row[4], $row[5], $row[6]];
153 printf "(%2d) $row[0]\n", $i;
154 print " created by $row[1] on $row[2]\n";
155 }
156 print "\nWhich dataset would you like to use?: ";
157 my $response = <STDIN>;
158 $response =~ s/[ \t\n]//g;
159 if (!(exists $map{$response}))
160 {
161 print "Your selection \"$response\" was not a valid option! Quitting.\n";
162 exit;
163 }
164
165 return ($map{$response}[0], $map{$response}[1], $map{$response}[2], $map{$response}[3]);
166 }
167
168 sub
169 outputCondor
170 {
171 my $workingDir = shift;
172 my $nJobs = shift;
173 my $dataset = shift;
174
175 my $cmsRun = `which cmsRun`;
176 open (SUB, ">$workingDir/condor.sub");
177
178 print SUB "Executable = $cmsRun\n";
179 print SUB "Universe = vanilla\n";
180 print SUB "Getenv = True\n";
181 print SUB "Arguments = config_cfg.py $nJobs \$(Process) $dataset\n";
182 print SUB "\n";
183 print SUB "Output = condor_\$(Process).out\n";
184 print SUB "Error = condor_\$(Process).err\n";
185 print SUB "Log = condor_\$(Process).log\n";
186 print SUB "\n";
187 print SUB "+IsLocalJob = true\n";
188 print SUB "Rank = TARGET.IsLocalSlot\n";
189 print SUB "\n";
190 print SUB "Queue $nJobs\n";
191
192 close (SUB);
193 }
194
195 sub
196 printHelp
197 {
198 my $exeName = $0;
199 $exeName =~ s/^.*\/([^\/]*)$/$1/;
200
201 print "Usage: $exeName [OPTION]... DATASET CONFIG DIRECTORY NJOBS\n";
202 print "Submits CMSSW jobs to the OSU Tier 3 compute nodes using Condor.\n";
203 print "\n";
204 printf "%-29s%s\n", " -h, --help", "print this help message";
205 print "\n";
206 print "The DATASET must exist in the Tier 3 ntuple database, and CONFIG must be a valid\n";
207 print "CMSSW python configuration which can be used with cmsRun. DIRECTORY is a working\n";
208 print "directory that is created and in which all output, both from the CMSSW jobs and\n";
209 print "from Condor, is placed. Finally, NJOBS is the number of Condor jobs that will\n";
210 print "be created.\n";
211
212 exit;
213 }