1 |
#!/usr/bin/env perl
|
2 |
|
3 |
use strict;
|
4 |
use Mysql;
|
5 |
use Getopt::Long;
|
6 |
use File::Copy;
|
7 |
|
8 |
sub getDataset;
|
9 |
sub dbUpdate;
|
10 |
sub dbDelete;
|
11 |
sub addSlashes;
|
12 |
sub uploadRelease;
|
13 |
|
14 |
our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
|
15 |
|
16 |
my %opt;
|
17 |
Getopt::Long::Configure ("bundling");
|
18 |
GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
|
19 |
my $argc = @ARGV;
|
20 |
|
21 |
printHelp ($ARGV[0]) if $opt{"help"};
|
22 |
printHelp () if $argc != 2;
|
23 |
printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
|
24 |
if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
|
25 |
{
|
26 |
print "The directory containing the ntuples must be given!\n";
|
27 |
exit;
|
28 |
}
|
29 |
if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
|
30 |
{
|
31 |
print "Both the Python and the CMSSW release must be given!\n";
|
32 |
exit;
|
33 |
}
|
34 |
if ($ARGV[0] ne "uploadRelease")
|
35 |
{
|
36 |
my $id = -1;
|
37 |
my $fullDataset;
|
38 |
($id, $fullDataset) = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
|
39 |
$fullDataset = $ARGV[1] if $id < 0;
|
40 |
$ARGV[0] = "create" if $id < 0;
|
41 |
my $status = "present";
|
42 |
$status = "" if $ARGV[0] eq "update";
|
43 |
$status = "deprecated" if $ARGV[0] eq "deprecate";
|
44 |
$opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
|
45 |
dbUpdate ($id, $fullDataset, $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"}, $opt{"release"});
|
46 |
dbDelete ($id) if $ARGV[0] eq "deleteEntry";
|
47 |
}
|
48 |
else
|
49 |
{
|
50 |
my $parentDir = "./$opt{'release'}";
|
51 |
$parentDir =~ s/CMSSW_[^\/]*//g;
|
52 |
$parentDir =~ s/\/\/*/\//g;
|
53 |
my $cmsswRelease = $opt{'release'};
|
54 |
$cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
|
55 |
`tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
|
56 |
$opt{"format"} = "BEAN" if !$opt{"format"};
|
57 |
uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
|
58 |
}
|
59 |
|
60 |
sub
|
61 |
addSlashes
|
62 |
{
|
63 |
my $string = shift;
|
64 |
|
65 |
$string =~ s/\\/\\\\/g;
|
66 |
$string =~ s/'/\\'/g;
|
67 |
$string =~ s/"/\\"/g;
|
68 |
$string =~ s/\\0/\\\\0/g;
|
69 |
|
70 |
return $string;
|
71 |
}
|
72 |
|
73 |
sub
|
74 |
getDataset
|
75 |
{
|
76 |
my $command = shift;
|
77 |
my $dataset = shift;
|
78 |
|
79 |
my $results;
|
80 |
my $queryDataset = $dataset;
|
81 |
$queryDataset =~ s/\*/%/g;
|
82 |
$queryDataset =~ s/(.*)/%$1%/g;
|
83 |
my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
|
84 |
$db->selectdb ("ntuple");
|
85 |
$results = $db->query ($query);
|
86 |
if ($results->numrows () == 1)
|
87 |
{
|
88 |
my @row = $results->fetchrow ();
|
89 |
return ($row[0], $row[1]);
|
90 |
}
|
91 |
if ($results->numrows () == 0)
|
92 |
{
|
93 |
if ($command ne "deleteEntry" && $command ne "deprecate")
|
94 |
{
|
95 |
print "Database entry does not exist. Create it? (Y/n): ";
|
96 |
my $response = <STDIN>;
|
97 |
$response =~ s/\n//g;
|
98 |
$response = "y" if !$response;
|
99 |
exit if substr (lc ($response), 0, 1) ne 'y';
|
100 |
return (-1, "");
|
101 |
}
|
102 |
else
|
103 |
{
|
104 |
print "Database entry does not exist.\n";
|
105 |
exit;
|
106 |
}
|
107 |
}
|
108 |
my %id;
|
109 |
my %fullDataset;
|
110 |
print "Found multiple database entries matching\n";
|
111 |
print "\"$dataset\":\n";
|
112 |
print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
|
113 |
for (my $i = 1; $i <= $results->numrows (); $i++)
|
114 |
{
|
115 |
my @row = $results->fetchrow ();
|
116 |
$id{"$i"} = $row[0];
|
117 |
$fullDataset{"$i"} = $row[1];
|
118 |
printf "(%2d) $row[1]\n", $i;
|
119 |
print " created by $row[2] on $row[3]\n";
|
120 |
}
|
121 |
print "\nWhich entry would you like to modify?";
|
122 |
if ($command ne "deleteEntry" && $command ne "deprecate")
|
123 |
{
|
124 |
print " (Select 0 to create a new entry): "
|
125 |
}
|
126 |
else
|
127 |
{
|
128 |
print ": ";
|
129 |
}
|
130 |
my $response = <STDIN>;
|
131 |
$response =~ s/[ \t\n]//g;
|
132 |
return (-1, "") if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
|
133 |
if (!(exists $id{$response}))
|
134 |
{
|
135 |
print "Your selection \"$response\" was not a valid option! Quitting.\n";
|
136 |
exit;
|
137 |
}
|
138 |
|
139 |
return ($id{$response}, $fullDataset{$response});
|
140 |
}
|
141 |
|
142 |
sub
|
143 |
dbUpdate
|
144 |
{
|
145 |
my $id = shift;
|
146 |
my $dataset = shift;
|
147 |
my $user = shift;
|
148 |
my $format = shift;
|
149 |
my $location = shift;
|
150 |
my $fileListName = shift;
|
151 |
my $status = shift;
|
152 |
my $comment = shift;
|
153 |
my $psetName = shift;
|
154 |
my $crabCfgName = shift;
|
155 |
my $jsonFileName = shift;
|
156 |
my $lumiSummaryName = shift;
|
157 |
my $release = shift;
|
158 |
|
159 |
my $fileList;
|
160 |
my $pset;
|
161 |
my $crabCfg;
|
162 |
my $jsonFile;
|
163 |
my $lumiSummary;
|
164 |
my $size = 0;
|
165 |
my $nFiles = 0;
|
166 |
if ($fileListName)
|
167 |
{
|
168 |
if (!(-e $fileListName))
|
169 |
{
|
170 |
print "$fileListName does not exist!\n";
|
171 |
exit;
|
172 |
}
|
173 |
open (FILE_LIST, "<$fileListName");
|
174 |
my @fileList = <FILE_LIST>;
|
175 |
close (FILE_LIST);
|
176 |
$fileList = join ("", @fileList);
|
177 |
}
|
178 |
if ($psetName)
|
179 |
{
|
180 |
if (!(-e $psetName))
|
181 |
{
|
182 |
print "$psetName does not exist!\n";
|
183 |
exit;
|
184 |
}
|
185 |
open (PY_CONFIG, "<$psetName");
|
186 |
my @pset = <PY_CONFIG>;
|
187 |
close (PY_CONFIG);
|
188 |
$pset = join ("", @pset);
|
189 |
}
|
190 |
if ($crabCfgName)
|
191 |
{
|
192 |
if (!(-e $crabCfgName))
|
193 |
{
|
194 |
print "$crabCfgName does not exist!\n";
|
195 |
exit;
|
196 |
}
|
197 |
open (CRAB_CONFIG, "<$crabCfgName");
|
198 |
my @crabCfg = <CRAB_CONFIG>;
|
199 |
close (CRAB_CONFIG);
|
200 |
$crabCfg = join ("", @crabCfg);
|
201 |
}
|
202 |
if ($jsonFileName)
|
203 |
{
|
204 |
if (!(-e $jsonFileName))
|
205 |
{
|
206 |
print "$jsonFileName does not exist!\n";
|
207 |
exit;
|
208 |
}
|
209 |
open (JSON_FILE, "<$jsonFileName");
|
210 |
my @jsonFile = <JSON_FILE>;
|
211 |
close (JSON_FILE);
|
212 |
$jsonFile = join ("", @jsonFile);
|
213 |
}
|
214 |
if ($lumiSummaryName)
|
215 |
{
|
216 |
if (!(-e $lumiSummaryName))
|
217 |
{
|
218 |
print "$lumiSummaryName does not exist!\n";
|
219 |
exit;
|
220 |
}
|
221 |
open (LUMI_SUMMARY, "<$lumiSummaryName");
|
222 |
my @lumiSummary = <LUMI_SUMMARY>;
|
223 |
close (LUMI_SUMMARY);
|
224 |
$lumiSummary = join ("", @lumiSummary);
|
225 |
}
|
226 |
if ($release)
|
227 |
{
|
228 |
my $query = "select id from ntupleRelease where name='$release'";
|
229 |
$db->selectdb ("ntuple");
|
230 |
my $results = $db->query ($query);
|
231 |
if ($results->numrows () != 1)
|
232 |
{
|
233 |
print "Ntuple release \"$release\" not found!\n";
|
234 |
exit;
|
235 |
}
|
236 |
}
|
237 |
($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
|
238 |
my $fullLocation = $location;
|
239 |
$fullLocation = "$ENV{'PWD'}/$location" if !($location =~ m/^\//);
|
240 |
|
241 |
$dataset = addSlashes ($dataset);
|
242 |
$user = addSlashes ($user);
|
243 |
$format = addSlashes ($format);
|
244 |
$pset = addSlashes ($pset);
|
245 |
$crabCfg = addSlashes ($crabCfg);
|
246 |
$jsonFile = addSlashes ($jsonFile);
|
247 |
$lumiSummary = addSlashes ($lumiSummary);
|
248 |
$fullLocation = addSlashes ($fullLocation);
|
249 |
$fileList = addSlashes ($fileList);
|
250 |
$status = addSlashes ($status);
|
251 |
$comment = addSlashes ($comment);
|
252 |
$release = addSlashes ($release);
|
253 |
|
254 |
my $results;
|
255 |
my $query;
|
256 |
if ($id < 0)
|
257 |
{
|
258 |
$query = "select max(id) from ntuple";
|
259 |
$db->selectdb ("ntuple");
|
260 |
$results = $db->query ($query);
|
261 |
my @row = $results->fetchrow ();
|
262 |
my $id = 1;
|
263 |
$id = $row[0] + 1 if $results->numrows ();
|
264 |
|
265 |
$query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary, version) values ($id, '$dataset', now(), now(), '$user', '$format', '$fullLocation', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary', '$release')";
|
266 |
}
|
267 |
if ($id > 0)
|
268 |
{
|
269 |
my $fields;
|
270 |
my $values;
|
271 |
|
272 |
$values .= ", dataset='$dataset'" if $dataset;
|
273 |
$values .= ", lastUpdateTime=now()";
|
274 |
$values .= ", format='$format'" if $format;
|
275 |
$values .= ", location='$fullLocation'" if $location;
|
276 |
$values .= ", fileList='$fileList'" if $fileList;
|
277 |
$values .= ", nFiles=$nFiles" if $nFiles;
|
278 |
$values .= ", sizeInGB=$size" if $size;
|
279 |
$values .= ", status='$status'" if $status;
|
280 |
$values .= ", comment='$comment'" if $comment;
|
281 |
$values .= ", pset='$pset'" if $pset;
|
282 |
$values .= ", crabCfg='$crabCfg'" if $crabCfg;
|
283 |
$values .= ", jsonFile='$jsonFile'" if $jsonFile;
|
284 |
$values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
|
285 |
$values .= ", version='$release'" if $release;
|
286 |
|
287 |
$values =~ s/^, //;
|
288 |
$query = "update ntuple set $values where id=$id";
|
289 |
}
|
290 |
$results = $db->query ($query);
|
291 |
|
292 |
return $results;
|
293 |
}
|
294 |
|
295 |
sub
|
296 |
dbDelete
|
297 |
{
|
298 |
my $id = shift;
|
299 |
|
300 |
return if $id < 0;
|
301 |
my $query = "delete from ntuple where id=$id";
|
302 |
$db->selectdb ("ntuple");
|
303 |
my $results = $db->query ($query);
|
304 |
}
|
305 |
|
306 |
sub
|
307 |
printHelp
|
308 |
{
|
309 |
my $command = shift;
|
310 |
|
311 |
my $exeName = $0;
|
312 |
$exeName =~ s/^.*\/([^\/]*)$/$1/;
|
313 |
|
314 |
if ($command eq "create")
|
315 |
{
|
316 |
print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
|
317 |
print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
|
318 |
print "be present on the Tier 3, with the status being set accordingly.\n";
|
319 |
print "\n";
|
320 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
321 |
printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
|
322 |
printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
|
323 |
printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
|
324 |
printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
|
325 |
printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
|
326 |
printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
|
327 |
printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
|
328 |
printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
|
329 |
}
|
330 |
elsif ($command eq "update")
|
331 |
{
|
332 |
print "Usage: $exeName [OPTION]... update NAME\n";
|
333 |
print "Updates an existing database entry for dataset NAME.\n";
|
334 |
print "\n";
|
335 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
336 |
printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
|
337 |
printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
|
338 |
printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
|
339 |
printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
|
340 |
printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
|
341 |
printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
|
342 |
printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
|
343 |
printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
|
344 |
}
|
345 |
elsif ($command eq "finish")
|
346 |
{
|
347 |
print "Usage: $exeName -l DIRECTORY finish NAME\n";
|
348 |
|
349 |
print "Finalizes the database entry for dataset NAME, changing its status to\n";
|
350 |
print "\"present\". This is intended to be the final step in command-line based ntuple\n";
|
351 |
print "production.\n";
|
352 |
print "\n";
|
353 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
354 |
printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
|
355 |
}
|
356 |
elsif ($command eq "deleteEntry")
|
357 |
{
|
358 |
print "Usage: $exeName deleteEntry NAME\n";
|
359 |
print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
|
360 |
print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
|
361 |
print "please use the \"deprecate\" command instead.\n";
|
362 |
}
|
363 |
elsif ($command eq "deprecate")
|
364 |
{
|
365 |
print "Usage: $exeName deprecate NAME\n";
|
366 |
print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
|
367 |
print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
|
368 |
print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
|
369 |
}
|
370 |
elsif ($command eq "uploadRelease")
|
371 |
{
|
372 |
print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
|
373 |
print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
|
374 |
print "database entry for it, with NAME being the name of the release. An ntuple\n";
|
375 |
print "release is a CMSSW release with all the necessary packages added for creating\n";
|
376 |
print "ntuples, along with a default Python config file.\n";
|
377 |
print "\n";
|
378 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
379 |
printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
|
380 |
printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
|
381 |
printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
|
382 |
printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
|
383 |
}
|
384 |
else
|
385 |
{
|
386 |
print "Usage: $exeName [OPTION]... COMMAND NAME\n";
|
387 |
print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
|
388 |
print "\n";
|
389 |
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
390 |
printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
|
391 |
printf "%-29s%s\n", " ", "specific to COMMAND.";
|
392 |
print "\n";
|
393 |
print "COMMAND may be one of the following:\n";
|
394 |
printf "%-29s%s\n", " create", "creates the entry";
|
395 |
printf "%-29s%s\n", " update", "updates the entry";
|
396 |
printf "%-29s%s\n", " finish", "finalizes the database entry";
|
397 |
printf "%-29s%s\n", " deleteEntry", "removes the database entry";
|
398 |
printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
|
399 |
printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
|
400 |
}
|
401 |
|
402 |
exit;
|
403 |
}
|
404 |
|
405 |
sub
|
406 |
sizeOfDataset
|
407 |
{
|
408 |
my $dataset = shift;
|
409 |
my $location = shift;
|
410 |
my $fileList = shift;
|
411 |
|
412 |
my $size = 0.0;
|
413 |
my $nFiles = 0;
|
414 |
if ($location && !$fileList)
|
415 |
{
|
416 |
if (!(-e $location))
|
417 |
{
|
418 |
print "$location does not exist!\n";
|
419 |
exit;
|
420 |
}
|
421 |
$nFiles = `ls $location | wc -l`;
|
422 |
$size = `du -s $location`;
|
423 |
$size =~ s/([^ ]*) .*/$1/;
|
424 |
$size /= 1024 * 1024;
|
425 |
$size = sprintf "%.2f", $size;
|
426 |
}
|
427 |
elsif ($fileList)
|
428 |
{
|
429 |
foreach my $file (split (/\n/, $fileList))
|
430 |
{
|
431 |
$nFiles++;
|
432 |
my $fileSize = `du -s $file`;
|
433 |
$fileSize =~ s/([^ ]*) .*/$1/;
|
434 |
$size += $fileSize;
|
435 |
}
|
436 |
$size /= 1024 * 1024;
|
437 |
$size = sprintf "%.2f", $size;
|
438 |
}
|
439 |
|
440 |
return ($nFiles, $size);
|
441 |
}
|
442 |
|
443 |
sub
|
444 |
uploadRelease
|
445 |
{
|
446 |
my $format = shift;
|
447 |
my $cmsswRelease = shift;
|
448 |
my $release = shift;
|
449 |
my $psetName = shift;
|
450 |
my $name = shift;
|
451 |
my $user = shift;
|
452 |
my $comment = shift;
|
453 |
|
454 |
move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
|
455 |
if (!(-e $psetName))
|
456 |
{
|
457 |
print "$psetName does not exist!\n";
|
458 |
exit;
|
459 |
}
|
460 |
open (PY_CONFIG, "<$psetName");
|
461 |
my @pset = <PY_CONFIG>;
|
462 |
close (PY_CONFIG);
|
463 |
my $pset = join ("", @pset);
|
464 |
|
465 |
my $query = "select max(id) from ntupleRelease";
|
466 |
$db->selectdb ("ntuple");
|
467 |
my $results = $db->query ($query);
|
468 |
my @row = $results->fetchrow ();
|
469 |
my $id = 1;
|
470 |
$id = $row[0] + 1 if $results->numrows ();
|
471 |
|
472 |
$name = addSlashes ($name);
|
473 |
$pset = addSlashes ($pset);
|
474 |
$user = addSlashes ($user);
|
475 |
$format = addSlashes ($format);
|
476 |
$cmsswRelease = addSlashes ($cmsswRelease);
|
477 |
$comment = addSlashes ($comment);
|
478 |
|
479 |
my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
|
480 |
$db->selectdb ("ntuple");
|
481 |
my $results = $db->query ($query);
|
482 |
}
|