ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osudb
Revision: 1.10
Committed: Wed Oct 17 18:28:47 2012 UTC (12 years, 6 months ago) by ahart
Branch: MAIN
Changes since 1.9: +4 -4 lines
Log Message:
Append the hostname to the lastUpdateUser field.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use Getopt::Long;
6 use File::Copy;
7
8 sub getDataset;
9 sub dbUpdate;
10 sub dbUpdateOther;
11 sub dbDelete;
12 sub dbDeleteOther;
13 sub addSlashes;
14 sub uploadRelease;
15 sub datasetExists;
16
17 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
18
19 my %opt;
20 Getopt::Long::Configure ("bundling");
21 GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "other|o", "help|h");
22 my $argc = @ARGV;
23
24 printHelp ($ARGV[0]) if $opt{"help"};
25 printHelp () if $argc != 2 && $ARGV[0] ne "createOther";
26 printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "createOther" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
27 if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
28 {
29 print "The directory containing the ntuples must be given!\n";
30 exit;
31 }
32 if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
33 {
34 print "Both the Python and the CMSSW release must be given!\n";
35 exit;
36 }
37 if ($ARGV[0] eq "uploadRelease")
38 {
39 my $parentDir = "./$opt{'release'}";
40 $parentDir =~ s/CMSSW_[^\/]*//g;
41 $parentDir =~ s/\/\/*/\//g;
42 my $cmsswRelease = $opt{'release'};
43 $cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
44 `tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
45 $opt{"format"} = "BEAN" if !$opt{"format"};
46 uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], "$ENV{'USER'}\@$ENV{'HOSTNAME'}", $opt{"comment"});
47 }
48 elsif ($ARGV[0] eq "createOther")
49 {
50 my @listOfFiles = @ARGV;
51 @listOfFiles = reverse (@listOfFiles);
52 pop (@listOfFiles);
53 @listOfFiles = reverse (@listOfFiles);
54 dbOtherCreate ("$ENV{'USER'}\@$ENV{'HOSTNAME'}", \@listOfFiles, $opt{"comment"});
55 }
56 else
57 {
58 my $id = -1;
59 my $fullDataset;
60 ($id, $fullDataset) = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create" && ($ARGV[0] ne "update" || !$opt{"other"}) && ($ARGV[0] ne "deleteEntry" || !$opt{"other"});
61 $id = $ARGV[1] if ($ARGV[0] eq "update" && $opt{"other"}) || ($ARGV[0] eq "deleteEntry" && $opt{"other"});
62 $fullDataset = $ARGV[1] if $id < 0;
63 $ARGV[0] = "create" if $id < 0;
64 my $status = "present";
65 $status = "" if $ARGV[0] eq "update";
66 $status = "deprecated" if $ARGV[0] eq "deprecate";
67 $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
68 dbUpdate ($id, $fullDataset, "$ENV{'USER'}\@$ENV{'HOSTNAME'}", $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"}, $opt{"release"}) if !$opt{"other"};
69 dbUpdateOther ($id, $opt{"comment"}, "$ENV{'USER'}\@$ENV{'HOSTNAME'}") if $opt{"other"};
70 dbDelete ($id) if $ARGV[0] eq "deleteEntry" && !$opt{"other"};
71 dbDeleteOther ($id) if $ARGV[0] eq "deleteEntry" && $opt{"other"};
72 }
73
74 sub
75 addSlashes
76 {
77 my $string = shift;
78
79 $string =~ s/\\/\\\\/g;
80 $string =~ s/'/\\'/g;
81 $string =~ s/"/\\"/g;
82 $string =~ s/\\0/\\\\0/g;
83
84 return $string;
85 }
86
87 sub
88 getDataset
89 {
90 my $command = shift;
91 my $dataset = shift;
92
93 my $results;
94 my $queryDataset = $dataset;
95 $queryDataset =~ s/\*/%/g;
96 $queryDataset =~ s/(.*)/%$1%/g;
97 my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by creationTime";
98 $db->selectdb ("ntuple");
99 $results = $db->query ($query);
100 if ($results->numrows () == 1)
101 {
102 my @row = $results->fetchrow ();
103 return ($row[0], $row[1]);
104 }
105 if ($results->numrows () == 0)
106 {
107 if ($command ne "deleteEntry" && $command ne "deprecate")
108 {
109 print "Database entry does not exist. Create it? (Y/n): ";
110 my $response = <STDIN>;
111 $response =~ s/\n//g;
112 $response = "y" if !$response;
113 exit if substr (lc ($response), 0, 1) ne 'y';
114 return (-1, "");
115 }
116 else
117 {
118 print "Database entry does not exist.\n";
119 exit;
120 }
121 }
122 my %id;
123 my %fullDataset;
124 print "Found multiple database entries matching\n";
125 print "\"$dataset\":\n";
126 print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
127 for (my $i = 1; $i <= $results->numrows (); $i++)
128 {
129 my @row = $results->fetchrow ();
130 $id{"$i"} = $row[0];
131 $fullDataset{"$i"} = $row[1];
132 printf "(%2d) $row[1]\n", $i;
133 print " created by $row[2] on $row[3]\n";
134 }
135 print "\nWhich entry would you like to modify?";
136 if ($command ne "deleteEntry" && $command ne "deprecate")
137 {
138 print " (Select 0 to create a new entry): "
139 }
140 else
141 {
142 print ": ";
143 }
144 my $response = <STDIN>;
145 $response =~ s/[ \t\n]//g;
146 return (-1, "") if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
147 if (!(exists $id{$response}))
148 {
149 print "Your selection \"$response\" was not a valid option! Quitting.\n";
150 exit;
151 }
152
153 return ($id{$response}, $fullDataset{$response});
154 }
155
156 sub
157 dbUpdate
158 {
159 my $id = shift;
160 my $dataset = shift;
161 my $user = shift;
162 my $format = shift;
163 my $location = shift;
164 my $fileListName = shift;
165 my $status = shift;
166 my $comment = shift;
167 my $psetName = shift;
168 my $crabCfgName = shift;
169 my $jsonFileName = shift;
170 my $lumiSummaryName = shift;
171 my $release = shift;
172
173 my $fileList;
174 my $pset;
175 my $crabCfg;
176 my $jsonFile;
177 my $lumiSummary;
178 my $size = 0;
179 my $nFiles = 0;
180 if ($fileListName)
181 {
182 if (!(-e $fileListName))
183 {
184 print "$fileListName does not exist!\n";
185 exit;
186 }
187 open (FILE_LIST, "<$fileListName");
188 my @fileList = <FILE_LIST>;
189 close (FILE_LIST);
190 $fileList = join ("", @fileList);
191 }
192 if ($psetName)
193 {
194 if (!(-e $psetName))
195 {
196 print "$psetName does not exist!\n";
197 exit;
198 }
199 open (PY_CONFIG, "<$psetName");
200 my @pset = <PY_CONFIG>;
201 close (PY_CONFIG);
202 $pset = join ("", @pset);
203 }
204 if ($crabCfgName)
205 {
206 if (!(-e $crabCfgName))
207 {
208 print "$crabCfgName does not exist!\n";
209 exit;
210 }
211 open (CRAB_CONFIG, "<$crabCfgName");
212 my @crabCfg = <CRAB_CONFIG>;
213 close (CRAB_CONFIG);
214 $crabCfg = join ("", @crabCfg);
215 }
216 if ($jsonFileName)
217 {
218 if (!(-e $jsonFileName))
219 {
220 print "$jsonFileName does not exist!\n";
221 exit;
222 }
223 open (JSON_FILE, "<$jsonFileName");
224 my @jsonFile = <JSON_FILE>;
225 close (JSON_FILE);
226 $jsonFile = join ("", @jsonFile);
227 }
228 if ($lumiSummaryName)
229 {
230 if (!(-e $lumiSummaryName))
231 {
232 print "$lumiSummaryName does not exist!\n";
233 exit;
234 }
235 open (LUMI_SUMMARY, "<$lumiSummaryName");
236 my @lumiSummary = <LUMI_SUMMARY>;
237 close (LUMI_SUMMARY);
238 $lumiSummary = join ("", @lumiSummary);
239 }
240 if ($release)
241 {
242 my $query = "select id from ntupleRelease where name='$release'";
243 $db->selectdb ("ntuple");
244 my $results = $db->query ($query);
245 if ($results->numrows () != 1)
246 {
247 print "Ntuple release \"$release\" not found!\n";
248 exit;
249 }
250 }
251 ($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
252 my $fullLocation = $location;
253 $fullLocation = "$ENV{'PWD'}/$location" if !($location =~ m/^\//);
254
255 $dataset = addSlashes ($dataset);
256 $user = addSlashes ($user);
257 $format = addSlashes ($format);
258 $pset = addSlashes ($pset);
259 $crabCfg = addSlashes ($crabCfg);
260 $jsonFile = addSlashes ($jsonFile);
261 $lumiSummary = addSlashes ($lumiSummary);
262 $fullLocation = addSlashes ($fullLocation);
263 $fileList = addSlashes ($fileList);
264 $status = addSlashes ($status);
265 $comment = addSlashes ($comment);
266 $release = addSlashes ($release);
267
268 my $results;
269 my $query;
270 if ($id < 0)
271 {
272 $query = "select max(id) from ntuple";
273 $db->selectdb ("ntuple");
274 $results = $db->query ($query);
275 my @row = $results->fetchrow ();
276 my $id = 1;
277 $id = $row[0] + 1 if $results->numrows ();
278
279 $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, lastUpdateUser, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary, version) values ($id, '$dataset', now(), now(), '$user', '$user', '$format', '$fullLocation', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary', '$release')";
280 }
281 if ($id > 0)
282 {
283 my $fields;
284 my $values;
285
286 $values .= ", dataset='$dataset'" if $dataset;
287 $values .= ", lastUpdateTime=now()";
288 $values .= ", lastUpdateUser='$user'";
289 $values .= ", format='$format'" if $format;
290 $values .= ", location='$fullLocation'" if $location;
291 $values .= ", fileList='$fileList'" if $fileList;
292 $values .= ", nFiles=$nFiles" if $nFiles;
293 $values .= ", sizeInGB=$size" if $size;
294 $values .= ", status='$status'" if $status;
295 $values .= ", comment='$comment'" if $comment;
296 $values .= ", pset='$pset'" if $pset;
297 $values .= ", crabCfg='$crabCfg'" if $crabCfg;
298 $values .= ", jsonFile='$jsonFile'" if $jsonFile;
299 $values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
300 $values .= ", version='$release'" if $release;
301
302 $values =~ s/^, //;
303 $query = "update ntuple set $values where id=$id";
304 }
305 $results = $db->query ($query);
306
307 return $results;
308 }
309
310 sub
311 dbUpdateOther
312 {
313 my $id = shift;
314 my $comment = shift;
315 my $user = shift;
316
317 $comment = addSlashes ($comment);
318
319 my $query = "update other set lastUpdateTime=now(), lastUpdateUser='$user', comment='$comment' where id=$id";
320 $db->selectdb ("ntuple");
321 my $results = $db->query ($query);
322
323 return $results;
324 }
325
326 sub
327 dbDelete
328 {
329 my $id = shift;
330
331 my $query = "delete from ntuple where id=$id";
332 $db->selectdb ("ntuple");
333 my $results = $db->query ($query);
334 }
335
336 sub
337 dbDeleteOther
338 {
339 my $id = shift;
340
341 my $query = "delete from other where id=$id";
342 $db->selectdb ("ntuple");
343 my $results = $db->query ($query);
344 }
345
346 sub
347 printHelp
348 {
349 my $command = shift;
350
351 my $exeName = $0;
352 $exeName =~ s/^.*\/([^\/]*)$/$1/;
353
354 if ($command eq "create")
355 {
356 print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
357 print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
358 print "be present on the Tier 3, with the status being set accordingly.\n";
359 print "\n";
360 print "Mandatory arguments to long options are mandatory for short options too.\n";
361 printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
362 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
363 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
364 printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
365 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
366 printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
367 printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
368 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
369 }
370 elsif ($command eq "createOther")
371 {
372 print "Usage: $exeName [OPTION]... createOther DIRECTORIES_AND_FILES\n";
373 print "Creates an entry in the database for non-ntuple data.\n";
374 print "\n";
375 print "Mandatory arguments to long options are mandatory for short options too.\n";
376 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
377 }
378 elsif ($command eq "update")
379 {
380 print "Usage: $exeName [OPTION]... update NAME\n";
381 print "Updates an existing database entry for dataset NAME.\n";
382 print "\n";
383 print "Mandatory arguments to long options are mandatory for short options too.\n";
384 printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
385 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
386 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
387 printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
388 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
389 printf "%-29s%s\n", " -o, --other", "update an entry in the non-ntuple database";
390 printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
391 printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
392 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
393 }
394 elsif ($command eq "finish")
395 {
396 print "Usage: $exeName -l DIRECTORY finish NAME\n";
397
398 print "Finalizes the database entry for dataset NAME, changing its status to\n";
399 print "\"present\". This is intended to be the final step in command-line based ntuple\n";
400 print "production.\n";
401 print "\n";
402 print "Mandatory arguments to long options are mandatory for short options too.\n";
403 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
404 }
405 elsif ($command eq "deleteEntry")
406 {
407 print "Usage: $exeName [OPTION]... deleteEntry NAME\n";
408 print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
409 print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
410 print "please use the \"deprecate\" command instead.\n";
411 print "\n";
412 printf "%-29s%s\n", " -o, --other", "update an entry in the non-ntuple database";
413 }
414 elsif ($command eq "deprecate")
415 {
416 print "Usage: $exeName deprecate NAME\n";
417 print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
418 print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
419 print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
420 }
421 elsif ($command eq "uploadRelease")
422 {
423 print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
424 print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
425 print "database entry for it, with NAME being the name of the release. An ntuple\n";
426 print "release is a CMSSW release with all the necessary packages added for creating\n";
427 print "ntuples, along with a default Python config file.\n";
428 print "\n";
429 print "Mandatory arguments to long options are mandatory for short options too.\n";
430 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
431 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
432 printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
433 printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
434 }
435 else
436 {
437 print "Usage: $exeName [OPTION]... COMMAND NAME\n";
438 print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
439 print "\n";
440 print "Mandatory arguments to long options are mandatory for short options too.\n";
441 printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
442 printf "%-29s%s\n", " ", "specific to COMMAND.";
443 print "\n";
444 print "COMMAND may be one of the following:\n";
445 printf "%-29s%s\n", " create", "creates the entry";
446 printf "%-29s%s\n", " create", "creates an entry for non-ntuple data";
447 printf "%-29s%s\n", " update", "updates the entry";
448 printf "%-29s%s\n", " finish", "finalizes the database entry";
449 printf "%-29s%s\n", " deleteEntry", "removes the database entry";
450 printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
451 printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
452 }
453
454 exit;
455 }
456
457 sub
458 sizeOfDataset
459 {
460 my $dataset = shift;
461 my $location = shift;
462 my $fileList = shift;
463
464 my $size = 0.0;
465 my $nFiles = 0;
466 if ($location && !$fileList)
467 {
468 if (!(-e $location))
469 {
470 print "$location does not exist!\n";
471 exit;
472 }
473 $nFiles = `ls $location | wc -l`;
474 $size = `du -s $location`;
475 $size =~ s/([^ ]*) .*/$1/;
476 $size /= 1024 * 1024;
477 $size = sprintf "%.2f", $size;
478 }
479 elsif ($fileList)
480 {
481 foreach my $file (split (/\n/, $fileList))
482 {
483 $nFiles++;
484 my $fileSize = `du -s $file`;
485 $fileSize =~ s/([^ ]*) .*/$1/;
486 $size += $fileSize;
487 }
488 $size /= 1024 * 1024;
489 $size = sprintf "%.2f", $size;
490 }
491
492 return ($nFiles, $size);
493 }
494
495 sub
496 uploadRelease
497 {
498 my $format = shift;
499 my $cmsswRelease = shift;
500 my $release = shift;
501 my $psetName = shift;
502 my $name = shift;
503 my $user = shift;
504 my $comment = shift;
505
506 move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
507 if (!(-e $psetName))
508 {
509 print "$psetName does not exist!\n";
510 exit;
511 }
512 open (PY_CONFIG, "<$psetName");
513 my @pset = <PY_CONFIG>;
514 close (PY_CONFIG);
515 my $pset = join ("", @pset);
516
517 my $query = "select max(id) from ntupleRelease";
518 $db->selectdb ("ntuple");
519 my $results = $db->query ($query);
520 my @row = $results->fetchrow ();
521 my $id = 1;
522 $id = $row[0] + 1 if $results->numrows ();
523
524 $name = addSlashes ($name);
525 $pset = addSlashes ($pset);
526 $user = addSlashes ($user);
527 $format = addSlashes ($format);
528 $cmsswRelease = addSlashes ($cmsswRelease);
529 $comment = addSlashes ($comment);
530
531 my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
532 $db->selectdb ("ntuple");
533 my $results = $db->query ($query);
534 }
535
536 sub
537 dbOtherCreate
538 {
539 my $user = shift;
540 my $listOfFiles = shift;
541 my $comment = shift;
542
543 my $size = 0.0;
544 my $nFiles = 0.0;
545 for (my $i = 0; $i < @$listOfFiles; $i++)
546 {
547 if (!(-e $$listOfFiles[$i]))
548 {
549 print "$$listOfFiles[$i] does not exist!\n";
550 exit;
551 }
552 $nFiles += `ls -R $$listOfFiles[$i] | grep -v ':\$' | grep -v '^\$' | wc -l`;
553 my $fileSize = `du -s $$listOfFiles[$i]`;
554 $fileSize =~ s/([^ ]*) .*/$1/;
555 $fileSize /= 1024 * 1024;
556 $size += $fileSize;
557 $$listOfFiles[$i] = "$ENV{'PWD'}/$$listOfFiles[$i]" if !($$listOfFiles[$i] =~ m/^\//);
558 }
559 $size = sprintf "%.2f", $size;
560 my $location = join ("<br />", @$listOfFiles);
561
562 my $query = "select max(id) from other";
563 $db->selectdb ("ntuple");
564 my $results = $db->query ($query);
565 my @row = $results->fetchrow ();
566 my $id = 1;
567 $id = $row[0] + 1 if $results->numrows ();
568
569 $user = addSlashes ($user);
570 $location = addSlashes ($location);
571 $comment = addSlashes ($comment);
572
573 my $query = "insert into other (id, creationTime, lastUpdateTime, lastUpdateUser, user, location, nFiles, sizeInGB, comment) values ($id, now(), now(), '$user', '$user', '$location', $nFiles, $size, '$comment')";
574 $db->selectdb ("ntuple");
575 my $results = $db->query ($query);
576
577 print "ID $id\n";
578 }