1 |
#!/usr/bin/env perl
|
2 |
|
3 |
use strict;
|
4 |
use Mysql;
|
5 |
use Getopt::Long;
|
6 |
use File::Copy;
|
7 |
|
8 |
sub getDataset;
|
9 |
sub dbUpdate;
|
10 |
sub dbDelete;
|
11 |
sub addSlashes;
|
12 |
sub uploadRelease;
|
13 |
|
14 |
our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
|
15 |
|
16 |
my %opt;
|
17 |
Getopt::Long::Configure ("bundling");
|
18 |
GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
|
19 |
my $argc = @ARGV;
|
20 |
|
21 |
printHelp ($ARGV[0]) if $opt{"help"};
|
22 |
printHelp () if $argc != 2;
|
23 |
printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
|
24 |
if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
|
25 |
{
|
26 |
print "The directory containing the ntuples must be given!\n";
|
27 |
exit;
|
28 |
}
|
29 |
if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
|
30 |
{
|
31 |
print "Both the Python and the CMSSW release must be given!\n";
|
32 |
exit;
|
33 |
}
|
34 |
if ($ARGV[0] ne "uploadRelease")
|
35 |
{
|
36 |
my $id = -1;
|
37 |
my $fullDataset;
|
38 |
($id, $fullDataset) = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
|
39 |
$fullDataset = $ARGV[1] if $id < 0;
|
40 |
$ARGV[0] = "create" if $id < 0;
|
41 |
my $status = "present";
|
42 |
$status = "" if $ARGV[0] eq "update";
|
43 |
$status = "deprecated" if $ARGV[0] eq "deprecate";
|
44 |
$opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
|
45 |
$opt{"location"} = "$ENV{'PWD'}/$opt{'location'}" if !($opt{"location"} =~ m/^\//);
|
46 |
dbUpdate ($id, $fullDataset, $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"}, $opt{"release"});
|
47 |
dbDelete ($id) if $ARGV[0] eq "deleteEntry";
|
48 |
}
|
49 |
else
|
50 |
{
|
51 |
my $parentDir = "./$opt{'release'}";
|
52 |
$parentDir =~ s/CMSSW_[^\/]*//g;
|
53 |
$parentDir =~ s/\/\/*/\//g;
|
54 |
my $cmsswRelease = $opt{'release'};
|
55 |
$cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
|
56 |
`tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
|
57 |
$opt{"format"} = "BEAN" if !$opt{"format"};
|
58 |
uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
|
59 |
}
|
60 |
|
61 |
sub
|
62 |
addSlashes
|
63 |
{
|
64 |
my $string = shift;
|
65 |
|
66 |
$string =~ s/\\/\\\\/g;
|
67 |
$string =~ s/'/\\'/g;
|
68 |
$string =~ s/"/\\"/g;
|
69 |
$string =~ s/\\0/\\\\0/g;
|
70 |
|
71 |
return $string;
|
72 |
}
|
73 |
|
74 |
sub
|
75 |
getDataset
|
76 |
{
|
77 |
my $command = shift;
|
78 |
my $dataset = shift;
|
79 |
|
80 |
my $results;
|
81 |
my $queryDataset = $dataset;
|
82 |
$queryDataset =~ s/\*/%/g;
|
83 |
$queryDataset =~ s/(.*)/%$1%/g;
|
84 |
my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
|
85 |
$db->selectdb ("ntuple");
|
86 |
$results = $db->query ($query);
|
87 |
if ($results->numrows () == 1)
|
88 |
{
|
89 |
my @row = $results->fetchrow ();
|
90 |
return ($row[0], $row[1]);
|
91 |
}
|
92 |
if ($results->numrows () == 0)
|
93 |
{
|
94 |
if ($command ne "deleteEntry" && $command ne "deprecate")
|
95 |
{
|
96 |
print "Database entry does not exist. Create it? (Y/n): ";
|
97 |
my $response = <STDIN>;
|
98 |
$response =~ s/\n//g;
|
99 |
$response = "y" if !$response;
|
100 |
exit if substr (lc ($response), 0, 1) ne 'y';
|
101 |
return (-1, "");
|
102 |
}
|
103 |
else
|
104 |
{
|
105 |
print "Database entry does not exist.\n";
|
106 |
exit;
|
107 |
}
|
108 |
}
|
109 |
my %id;
|
110 |
my %fullDataset;
|
111 |
print "Found multiple database entries matching\n";
|
112 |
print "\"$dataset\":\n";
|
113 |
print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
|
114 |
for (my $i = 1; $i <= $results->numrows (); $i++)
|
115 |
{
|
116 |
my @row = $results->fetchrow ();
|
117 |
$id{"$i"} = $row[0];
|
118 |
$fullDataset{"$i"} = $row[1];
|
119 |
printf "(%2d) $row[1]\n", $i;
|
120 |
print " created by $row[2] on $row[3]\n";
|
121 |
}
|
122 |
print "\nWhich entry would you like to modify?";
|
123 |
if ($command ne "deleteEntry" && $command ne "deprecate")
|
124 |
{
|
125 |
print " (Select 0 to create a new entry): "
|
126 |
}
|
127 |
else
|
128 |
{
|
129 |
print ": ";
|
130 |
}
|
131 |
my $response = <STDIN>;
|
132 |
$response =~ s/[ \t\n]//g;
|
133 |
return (-1, "") if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
|
134 |
if (!(exists $id{$response}))
|
135 |
{
|
136 |
print "Your selection \"$response\" was not a valid option! Quitting.\n";
|
137 |
exit;
|
138 |
}
|
139 |
|
140 |
return ($id{$response}, $fullDataset{$response});
|
141 |
}
|
142 |
|
143 |
sub
|
144 |
dbUpdate
|
145 |
{
|
146 |
my $id = shift;
|
147 |
my $dataset = shift;
|
148 |
my $user = shift;
|
149 |
my $format = shift;
|
150 |
my $location = shift;
|
151 |
my $fileListName = shift;
|
152 |
my $status = shift;
|
153 |
my $comment = shift;
|
154 |
my $psetName = shift;
|
155 |
my $crabCfgName = shift;
|
156 |
my $jsonFileName = shift;
|
157 |
my $lumiSummaryName = shift;
|
158 |
my $release = shift;
|
159 |
|
160 |
my $fileList;
|
161 |
my $pset;
|
162 |
my $crabCfg;
|
163 |
my $jsonFile;
|
164 |
my $lumiSummary;
|
165 |
my $size = 0;
|
166 |
my $nFiles = 0;
|
167 |
if ($fileListName)
|
168 |
{
|
169 |
if (!(-e $fileListName))
|
170 |
{
|
171 |
print "$fileListName does not exist!\n";
|
172 |
exit;
|
173 |
}
|
174 |
open (FILE_LIST, "<$fileListName");
|
175 |
my @fileList = <FILE_LIST>;
|
176 |
close (FILE_LIST);
|
177 |
$fileList = join ("", @fileList);
|
178 |
}
|
179 |
if ($psetName)
|
180 |
{
|
181 |
if (!(-e $psetName))
|
182 |
{
|
183 |
print "$psetName does not exist!\n";
|
184 |
exit;
|
185 |
}
|
186 |
open (PY_CONFIG, "<$psetName");
|
187 |
my @pset = <PY_CONFIG>;
|
188 |
close (PY_CONFIG);
|
189 |
$pset = join ("", @pset);
|
190 |
}
|
191 |
if ($crabCfgName)
|
192 |
{
|
193 |
if (!(-e $crabCfgName))
|
194 |
{
|
195 |
print "$crabCfgName does not exist!\n";
|
196 |
exit;
|
197 |
}
|
198 |
open (CRAB_CONFIG, "<$crabCfgName");
|
199 |
my @crabCfg = <CRAB_CONFIG>;
|
200 |
close (CRAB_CONFIG);
|
201 |
$crabCfg = join ("", @crabCfg);
|
202 |
}
|
203 |
if ($jsonFileName)
|
204 |
{
|
205 |
if (!(-e $jsonFileName))
|
206 |
{
|
207 |
print "$jsonFileName does not exist!\n";
|
208 |
exit;
|
209 |
}
|
210 |
open (JSON_FILE, "<$jsonFileName");
|
211 |
my @jsonFile = <JSON_FILE>;
|
212 |
close (JSON_FILE);
|
213 |
$jsonFile = join ("", @jsonFile);
|
214 |
}
|
215 |
if ($lumiSummaryName)
|
216 |
{
|
217 |
if (!(-e $lumiSummaryName))
|
218 |
{
|
219 |
print "$lumiSummaryName does not exist!\n";
|
220 |
exit;
|
221 |
}
|
222 |
open (LUMI_SUMMARY, "<$lumiSummaryName");
|
223 |
my @lumiSummary = <LUMI_SUMMARY>;
|
224 |
close (LUMI_SUMMARY);
|
225 |
$lumiSummary = join ("", @lumiSummary);
|
226 |
}
|
227 |
if ($release)
|
228 |
{
|
229 |
my $query = "select id from ntupleRelease where name='$release'";
|
230 |
$db->selectdb ("ntuple");
|
231 |
my $results = $db->query ($query);
|
232 |
if ($results->numrows () != 1)
|
233 |
{
|
234 |
print "Ntuple release \"$release\" not found!\n";
|
235 |
exit;
|
236 |
}
|
237 |
}
|
238 |
($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
|
239 |
|
240 |
$dataset = addSlashes ($dataset);
|
241 |
$user = addSlashes ($user);
|
242 |
$format = addSlashes ($format);
|
243 |
$pset = addSlashes ($pset);
|
244 |
$crabCfg = addSlashes ($crabCfg);
|
245 |
$jsonFile = addSlashes ($jsonFile);
|
246 |
$lumiSummary = addSlashes ($lumiSummary);
|
247 |
$location = addSlashes ($location);
|
248 |
$fileList = addSlashes ($fileList);
|
249 |
$status = addSlashes ($status);
|
250 |
$comment = addSlashes ($comment);
|
251 |
$release = addSlashes ($release);
|
252 |
|
253 |
my $results;
|
254 |
my $query;
|
255 |
if ($id < 0)
|
256 |
{
|
257 |
$query = "select max(id) from ntuple";
|
258 |
$db->selectdb ("ntuple");
|
259 |
$results = $db->query ($query);
|
260 |
my @row = $results->fetchrow ();
|
261 |
my $id = 1;
|
262 |
$id = $row[0] + 1 if $results->numrows ();
|
263 |
|
264 |
$query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary, version) values ($id, '$dataset', now(), now(), '$user', '$format', '$location', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary', '$release')";
|
265 |
}
|
266 |
if ($id > 0)
|
267 |
{
|
268 |
my $fields;
|
269 |
my $values;
|
270 |
|
271 |
$values .= ", dataset='$dataset'" if $dataset;
|
272 |
$values .= ", lastUpdateTime=now()";
|
273 |
$values .= ", format='$format'" if $format;
|
274 |
$values .= ", location='$location'" if $location;
|
275 |
$values .= ", fileList='$fileList'" if $fileList;
|
276 |
$values .= ", nFiles=$nFiles" if $nFiles;
|
277 |
$values .= ", sizeInGB=$size" if $size;
|
278 |
$values .= ", status='$status'" if $status;
|
279 |
$values .= ", comment='$comment'" if $comment;
|
280 |
$values .= ", pset='$pset'" if $pset;
|
281 |
$values .= ", crabCfg='$crabCfg'" if $crabCfg;
|
282 |
$values .= ", jsonFile='$jsonFile'" if $jsonFile;
|
283 |
$values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
|
284 |
$values .= ", version='$release'" if $release;
|
285 |
|
286 |
$values =~ s/^, //;
|
287 |
$query = "update ntuple set $values where id=$id";
|
288 |
}
|
289 |
$results = $db->query ($query);
|
290 |
|
291 |
return $results;
|
292 |
}
|
293 |
|
294 |
sub
|
295 |
dbDelete
|
296 |
{
|
297 |
my $id = shift;
|
298 |
|
299 |
return if $id < 0;
|
300 |
my $query = "delete from ntuple where id=$id";
|
301 |
$db->selectdb ("ntuple");
|
302 |
my $results = $db->query ($query);
|
303 |
}
|
304 |
|
305 |
sub
|
306 |
printHelp
|
307 |
{
|
308 |
my $command = shift;
|
309 |
|
310 |
my $exeName = $0;
|
311 |
$exeName =~ s/^.*\/([^\/]*)$/$1/;
|
312 |
|
313 |
if ($command eq "create")
|
314 |
{
|
315 |
print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
|
316 |
print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
|
317 |
print "be present on the Tier 3, with the status being set accordingly.\n";
|
318 |
print "\n";
|
319 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
320 |
printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
|
321 |
printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
|
322 |
printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
|
323 |
printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
|
324 |
printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
|
325 |
printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
|
326 |
printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
|
327 |
printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
|
328 |
}
|
329 |
elsif ($command eq "update")
|
330 |
{
|
331 |
print "Usage: $exeName [OPTION]... update NAME\n";
|
332 |
print "Updates an existing database entry for dataset NAME.\n";
|
333 |
print "\n";
|
334 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
335 |
printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
|
336 |
printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
|
337 |
printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
|
338 |
printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
|
339 |
printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
|
340 |
printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
|
341 |
printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
|
342 |
printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
|
343 |
}
|
344 |
elsif ($command eq "finish")
|
345 |
{
|
346 |
print "Usage: $exeName -l DIRECTORY finish NAME\n";
|
347 |
|
348 |
print "Finalizes the database entry for dataset NAME, changing its status to\n";
|
349 |
print "\"present\". This is intended to be the final step in command-line based ntuple\n";
|
350 |
print "production.\n";
|
351 |
print "\n";
|
352 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
353 |
printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
|
354 |
}
|
355 |
elsif ($command eq "deleteEntry")
|
356 |
{
|
357 |
print "Usage: $exeName deleteEntry NAME\n";
|
358 |
print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
|
359 |
print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
|
360 |
print "please use the \"deprecate\" command instead.\n";
|
361 |
}
|
362 |
elsif ($command eq "deprecate")
|
363 |
{
|
364 |
print "Usage: $exeName deprecate NAME\n";
|
365 |
print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
|
366 |
print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
|
367 |
print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
|
368 |
}
|
369 |
elsif ($command eq "uploadRelease")
|
370 |
{
|
371 |
print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
|
372 |
print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
|
373 |
print "database entry for it, with NAME being the name of the release. An ntuple\n";
|
374 |
print "release is a CMSSW release with all the necessary packages added for creating\n";
|
375 |
print "ntuples, along with a default Python config file.\n";
|
376 |
print "\n";
|
377 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
378 |
printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
|
379 |
printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
|
380 |
printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
|
381 |
printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
|
382 |
}
|
383 |
else
|
384 |
{
|
385 |
print "Usage: $exeName [OPTION]... COMMAND NAME\n";
|
386 |
print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
|
387 |
print "\n";
|
388 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
389 |
printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
|
390 |
printf "%-29s%s\n", " ", "specific to COMMAND.";
|
391 |
print "\n";
|
392 |
print "COMMAND may be one of the following:\n";
|
393 |
printf "%-29s%s\n", " create", "creates the entry";
|
394 |
printf "%-29s%s\n", " update", "updates the entry";
|
395 |
printf "%-29s%s\n", " finish", "finalizes the database entry";
|
396 |
printf "%-29s%s\n", " deleteEntry", "removes the database entry";
|
397 |
printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
|
398 |
printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
|
399 |
}
|
400 |
|
401 |
exit;
|
402 |
}
|
403 |
|
404 |
sub
|
405 |
sizeOfDataset
|
406 |
{
|
407 |
my $dataset = shift;
|
408 |
my $location = shift;
|
409 |
my $fileList = shift;
|
410 |
|
411 |
my $query = "select nFiles from ntuple where dataset='$dataset'";
|
412 |
$db->selectdb ("ntuple");
|
413 |
my $results = $db->query ($query);
|
414 |
my @row = $results->fetchrow ();
|
415 |
my $previousNFiles = 0;
|
416 |
$previousNFiles = $row[0] if $results->numrows ();
|
417 |
|
418 |
my $size = 0.0;
|
419 |
my $nFiles = 0;
|
420 |
if ($location && !$fileList && $previousNFiles < 1)
|
421 |
{
|
422 |
if (!(-e $location))
|
423 |
{
|
424 |
print "$location does not exist!\n";
|
425 |
exit;
|
426 |
}
|
427 |
$nFiles = `ls $location | wc -l`;
|
428 |
$size = `du -s $location`;
|
429 |
$size =~ s/([^ ]*) .*/$1/;
|
430 |
$size /= 1024 * 1024;
|
431 |
$size = sprintf "%.2f", $size;
|
432 |
}
|
433 |
elsif ($fileList)
|
434 |
{
|
435 |
foreach my $file (split (/\n/, $fileList))
|
436 |
{
|
437 |
$nFiles++;
|
438 |
my $fileSize = `du -s $file`;
|
439 |
$fileSize =~ s/([^ ]*) .*/$1/;
|
440 |
$size += $fileSize;
|
441 |
}
|
442 |
$size /= 1024 * 1024;
|
443 |
$size = sprintf "%.2f", $size;
|
444 |
}
|
445 |
|
446 |
return ($nFiles, $size);
|
447 |
}
|
448 |
|
449 |
sub
|
450 |
uploadRelease
|
451 |
{
|
452 |
my $format = shift;
|
453 |
my $cmsswRelease = shift;
|
454 |
my $release = shift;
|
455 |
my $psetName = shift;
|
456 |
my $name = shift;
|
457 |
my $user = shift;
|
458 |
my $comment = shift;
|
459 |
|
460 |
move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
|
461 |
if (!(-e $psetName))
|
462 |
{
|
463 |
print "$psetName does not exist!\n";
|
464 |
exit;
|
465 |
}
|
466 |
open (PY_CONFIG, "<$psetName");
|
467 |
my @pset = <PY_CONFIG>;
|
468 |
close (PY_CONFIG);
|
469 |
my $pset = join ("", @pset);
|
470 |
|
471 |
my $query = "select max(id) from ntupleRelease";
|
472 |
$db->selectdb ("ntuple");
|
473 |
my $results = $db->query ($query);
|
474 |
my @row = $results->fetchrow ();
|
475 |
my $id = 1;
|
476 |
$id = $row[0] + 1 if $results->numrows ();
|
477 |
|
478 |
$name = addSlashes ($name);
|
479 |
$pset = addSlashes ($pset);
|
480 |
$user = addSlashes ($user);
|
481 |
$format = addSlashes ($format);
|
482 |
$cmsswRelease = addSlashes ($cmsswRelease);
|
483 |
$comment = addSlashes ($comment);
|
484 |
|
485 |
my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
|
486 |
$db->selectdb ("ntuple");
|
487 |
my $results = $db->query ($query);
|
488 |
}
|