ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osudb
(Generate patch)

Comparing UserCode/OSUT3Analysis/DBTools/scripts/osudb (file contents):
Revision 1.3 by ahart, Thu Jun 14 17:42:30 2012 UTC vs.
Revision 1.7 by ahart, Wed Aug 1 14:35:07 2012 UTC

# Line 3 | Line 3
3   use strict;
4   use Mysql;
5   use Getopt::Long;
6 + use File::Copy;
7  
8   sub getDataset;
9   sub dbUpdate;
10 + sub dbDelete;
11   sub addSlashes;
12 + sub uploadRelease;
13  
14 < our $db;
14 > our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
15  
16   my %opt;
17   Getopt::Long::Configure ("bundling");
18 < GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "remoteLocation|r=s", "lumiSummary|s=s", "location|l=s", "help|h");
18 > GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
19   my $argc = @ARGV;
20  
21 < printHelp () if $opt{"help"};
21 > printHelp ($ARGV[0]) if $opt{"help"};
22   printHelp () if $argc != 2;
23 < printHelp () if $ARGV[0] != "create" && $ARGV[0] != "update" && $ARGV[0] != "finish" && $ARGV[0] != "deprecate";
24 < my $id = -1;
25 < $id = getDataset ($ARGV[1]) if $ARGV[0] ne "create";
26 < $ARGV[0] = "create" if $id < 0;
27 < my $status = "present";
28 < $status = "" if $ARGV[0] eq "update";
29 < $status = "deprecated" if $ARGV[0] eq "deprecate";
30 < $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
31 < dbUpdate ($id, $ARGV[1], $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"remoteLocation"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"});
23 > printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
24 > if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
25 >  {
26 >    print "The directory containing the ntuples must be given!\n";
27 >    exit;
28 >  }
29 > if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
30 >  {
31 >    print "Both the Python and the CMSSW release must be given!\n";
32 >    exit;
33 >  }
34 > if ($ARGV[0] ne "uploadRelease")
35 >  {
36 >    my $id = -1;
37 >    my $fullDataset;
38 >    ($id, $fullDataset) = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
39 >    $fullDataset = $ARGV[1] if $id < 0;
40 >    $ARGV[0] = "create" if $id < 0;
41 >    my $status = "present";
42 >    $status = "" if $ARGV[0] eq "update";
43 >    $status = "deprecated" if $ARGV[0] eq "deprecate";
44 >    $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
45 >    dbUpdate ($id, $fullDataset, $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"}, $opt{"release"});
46 >    dbDelete ($id) if $ARGV[0] eq "deleteEntry";
47 >  }
48 > else
49 >  {
50 >    my $parentDir = "./$opt{'release'}";
51 >    $parentDir =~ s/CMSSW_[^\/]*//g;
52 >    $parentDir =~ s/\/\/*/\//g;
53 >    my $cmsswRelease = $opt{'release'};
54 >    $cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
55 >    `tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
56 >    $opt{"format"} = "BEAN" if !$opt{"format"};
57 >    uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
58 >  }
59  
60   sub
61   addSlashes
# Line 43 | Line 73 | addSlashes
73   sub
74   getDataset
75   {
76 +  my $command = shift;
77    my $dataset = shift;
78  
79    my $results;
80    my $queryDataset = $dataset;
81    $queryDataset =~ s/\*/%/g;
82    $queryDataset =~ s/(.*)/%$1%/g;
52  $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User");
83    my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
84    $db->selectdb ("ntuple");
85    $results = $db->query ($query);
86    if ($results->numrows () == 1)
87      {
88        my @row = $results->fetchrow ();
89 <      return $row[0];
89 >      return ($row[0], $row[1]);
90      }
91    if ($results->numrows () == 0)
92      {
93 <      print "Database entry does not exist. Create it? (Y/n): ";
94 <      my $response = <STDIN>;
95 <      $response =~ s/\n//g;
96 <      $response = "y" if !$response;
97 <      exit if substr (lc ($response), 0, 1) ne 'y';
98 <      return -1;
93 >      if ($command ne "deleteEntry" && $command ne "deprecate")
94 >        {
95 >          print "Database entry does not exist. Create it? (Y/n): ";
96 >          my $response = <STDIN>;
97 >          $response =~ s/\n//g;
98 >          $response = "y" if !$response;
99 >          exit if substr (lc ($response), 0, 1) ne 'y';
100 >          return (-1, "");
101 >        }
102 >      else
103 >        {
104 >          print "Database entry does not exist.\n";
105 >          exit;
106 >        }
107      }
108 <  my %map;
108 >  my %id;
109 >  my %fullDataset;
110    print "Found multiple database entries matching\n";
111    print "\"$dataset\":\n";
112 <  print "( 0) new\n";
112 >  print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
113    for (my $i = 1; $i <= $results->numrows (); $i++)
114      {
115        my @row = $results->fetchrow ();
116 <      $map{"$i"} = $row[0];
116 >      $id{"$i"} = $row[0];
117 >      $fullDataset{"$i"} = $row[1];
118        printf "(%2d) $row[1]\n", $i;
119        print "     created by $row[2] on $row[3]\n";
120      }
121 <  print "\nWhich entry would you like to update? (Select 0 to create a new entry): ";
121 >  print "\nWhich entry would you like to modify?";
122 >  if ($command ne "deleteEntry" && $command ne "deprecate")
123 >    {
124 >      print " (Select 0 to create a new entry): "
125 >    }
126 >  else
127 >    {
128 >      print ": ";
129 >    }
130    my $response = <STDIN>;
131    $response =~ s/[ \t\n]//g;
132 <  return -1 if $response == 0;
133 <  if (!(exists $map{$response}))
132 >  return (-1, "") if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
133 >  if (!(exists $id{$response}))
134      {
135        print "Your selection \"$response\" was not a valid option! Quitting.\n";
136        exit;
137      }
138  
139 <  return $map{$response};
139 >  return ($id{$response}, $fullDataset{$response});
140   }
141  
142   sub
# Line 99 | Line 147 | dbUpdate
147    my $user = shift;
148    my $format = shift;
149    my $location = shift;
150 <  my $remoteLocation = shift;
150 >  my $fileListName = shift;
151    my $status = shift;
152    my $comment = shift;
153    my $psetName = shift;
154    my $crabCfgName = shift;
155    my $jsonFileName = shift;
156    my $lumiSummaryName = shift;
157 +  my $release = shift;
158  
159 +  my $fileList;
160    my $pset;
161    my $crabCfg;
162    my $jsonFile;
163    my $lumiSummary;
164    my $size = 0;
165    my $nFiles = 0;
166 +  if ($fileListName)
167 +    {
168 +      if (!(-e $fileListName))
169 +        {
170 +          print "$fileListName does not exist!\n";
171 +          exit;
172 +        }
173 +      open (FILE_LIST, "<$fileListName");
174 +      my @fileList = <FILE_LIST>;
175 +      close (FILE_LIST);
176 +      $fileList = join ("", @fileList);
177 +    }
178    if ($psetName)
179      {
180        if (!(-e $psetName))
# Line 161 | Line 223 | dbUpdate
223        close (LUMI_SUMMARY);
224        $lumiSummary = join ("", @lumiSummary);
225      }
226 <  if ($location)
227 <  {
228 <    if (!(-e $location))
229 <      {
230 <        print "$location does not exist!\n";
231 <        exit;
232 <      }
233 <     $nFiles = `ls $location | wc -l`;
234 <     $size = `du -s $location`;
235 <    $size =~ s/([^ ]*) .*/$1/;
236 <    $size /= 1024 * 1024;
237 <    $size = sprintf "%.2f", $size;
238 <  }
226 >  if ($release)
227 >    {
228 >      my $query = "select id from ntupleRelease where name='$release'";
229 >      $db->selectdb ("ntuple");
230 >      my $results = $db->query ($query);
231 >      if ($results->numrows () != 1)
232 >        {
233 >          print "Ntuple release \"$release\" not found!\n";
234 >          exit;
235 >        }
236 >    }
237 >  ($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
238 >  my $fullLocation = $location;
239 >  $fullLocation = "$ENV{'PWD'}/$location" if !($location =~ m/^\//);
240  
241    $dataset = addSlashes ($dataset);
242    $user = addSlashes ($user);
# Line 182 | Line 245 | dbUpdate
245    $crabCfg = addSlashes ($crabCfg);
246    $jsonFile = addSlashes ($jsonFile);
247    $lumiSummary = addSlashes ($lumiSummary);
248 <  $location = addSlashes ($location);
249 <  $remoteLocation = addSlashes ($remoteLocation);
248 >  $fullLocation = addSlashes ($fullLocation);
249 >  $fileList = addSlashes ($fileList);
250    $status = addSlashes ($status);
251    $comment = addSlashes ($comment);
252 +  $release = addSlashes ($release);
253  
254    my $results;
255    my $query;
256    if ($id < 0)
257      {
194      $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User");
258        $query = "select max(id) from ntuple";
259        $db->selectdb ("ntuple");
260        $results = $db->query ($query);
# Line 199 | Line 262 | dbUpdate
262        my $id = 1;
263        $id = $row[0] + 1 if $results->numrows ();
264  
265 <      $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, remoteLocation, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary) values ($id, '$dataset', now(), now(), '$user', '$format', '$location', '$remoteLocation', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary')";
265 >      $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary, version) values ($id, '$dataset', now(), now(), '$user', '$format', '$fullLocation', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary', '$release')";
266      }
267    if ($id > 0)
268      {
# Line 209 | Line 272 | dbUpdate
272        $values .= ", dataset='$dataset'" if $dataset;
273        $values .= ", lastUpdateTime=now()";
274        $values .= ", format='$format'" if $format;
275 <      $values .= ", location='$location'" if $location;
276 <      $values .= ", remoteLocation='$remoteLocation'" if $remoteLocation;
277 <      $values .= ", nFiles=$nFiles" if $location;
278 <      $values .= ", sizeInGB=$size" if $location;
275 >      $values .= ", location='$fullLocation'" if $location;
276 >      $values .= ", fileList='$fileList'" if $fileList;
277 >      $values .= ", nFiles=$nFiles" if $nFiles;
278 >      $values .= ", sizeInGB=$size" if $size;
279        $values .= ", status='$status'" if $status;
280        $values .= ", comment='$comment'" if $comment;
281        $values .= ", pset='$pset'" if $pset;
282        $values .= ", crabCfg='$crabCfg'" if $crabCfg;
283        $values .= ", jsonFile='$jsonFile'" if $jsonFile;
284        $values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
285 +      $values .= ", version='$release'" if $release;
286  
287        $values =~ s/^, //;
288        $query = "update ntuple set $values where id=$id";
# Line 229 | Line 293 | dbUpdate
293   }
294  
295   sub
296 + dbDelete
297 + {
298 +  my $id = shift;
299 +
300 +  return if $id < 0;
301 +  my $query = "delete from ntuple where id=$id";
302 +  $db->selectdb ("ntuple");
303 +  my $results = $db->query ($query);
304 + }
305 +
306 + sub
307   printHelp
308   {
309 +  my $command = shift;
310 +
311    my $exeName = $0;
312    $exeName =~ s/^.*\/([^\/]*)$/$1/;
313  
314 <  print "Usage: $exeName [OPTION]... COMMAND DATASET\n";
315 <  print "Manipulates entries in the OSU Tier 3 ntuple database.\n";
316 <  print "\n";
317 <  print "Mandatory arguments to long options are mandatory for short options too.\n";
318 <  printf "%-29s%s\n", "  -b, --crabCfg FILE", "CRAB config file used to submit the ntuple jobs";
319 <  printf "%-29s%s\n", "  -c, --comment COMMENT", "comment for this database entry";
320 <  printf "%-29s%s\n", "  -f, --format FORMAT", "ntuple format (default: BEAN)";
321 <  printf "%-29s%s\n", "  -j, --jsonFile FILE", "JSON file used to ntuplize the dataset";
322 <  printf "%-29s%s\n", "  -h, --help", "print this help message";
323 <  printf "%-29s%s\n", "  -l, --location DIRECTORY", "location on the Tier 3";
324 <  printf "%-29s%s\n", "  -p, --pyConfig FILE", "python config used to ntuplize the dataset";
325 <  printf "%-29s%s\n", "  -s, --lumiSummary FILE", "lumiSummary.json returned by CRAB";
326 <  print "\n";
327 <  print "COMMAND may be one of the following:\n";
328 <  printf "%-29s%s\n", "  create", "creates the entry (assumes the dataset is present)";
329 <  printf "%-29s%s\n", "  update", "updates the entry";
330 <  printf "%-29s%s\n", "  finish", "finalizes the database entry";
331 <  printf "%-29s%s\n", "  deprecate", "marks the dataset for deletion";
314 >  if ($command eq "create")
315 >    {
316 >      print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
317 >      print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
318 >      print "be present on the Tier 3, with the status being set accordingly.\n";
319 >      print "\n";
320 >      print "Mandatory arguments to long options are mandatory for short options too.\n";
321 >      printf "%-29s%s\n", "  -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
322 >      printf "%-29s%s\n", "  -c, --comment COMMENT", "comment for the database entry";
323 >      printf "%-29s%s\n", "  -f, --format FORMAT", "ntuple format (default: BEAN)";
324 >      printf "%-29s%s\n", "  -j, --jsonFile FILE", "JSON file used for this dataset";
325 >      printf "%-29s%s\n", "  -l, --location DIRECTORY", "directory containing the ntuples";
326 >      printf "%-29s%s\n", "  -p, --pyConfig FILE", "Python config used to produce ntuples";
327 >      printf "%-29s%s\n", "  -r, --release NAME", "ntuple release used to produce ntuples";
328 >      printf "%-29s%s\n", "  -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
329 >    }
330 >  elsif ($command eq "update")
331 >    {
332 >      print "Usage: $exeName [OPTION]... update NAME\n";
333 >      print "Updates an existing database entry for dataset NAME.\n";
334 >      print "\n";
335 >      print "Mandatory arguments to long options are mandatory for short options too.\n";
336 >      printf "%-29s%s\n", "  -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
337 >      printf "%-29s%s\n", "  -c, --comment COMMENT", "comment for the database entry";
338 >      printf "%-29s%s\n", "  -f, --format FORMAT", "ntuple format";
339 >      printf "%-29s%s\n", "  -j, --jsonFile FILE", "JSON file used for this dataset";
340 >      printf "%-29s%s\n", "  -l, --location DIRECTORY", "directory containing the ntuples";
341 >      printf "%-29s%s\n", "  -p, --pyConfig FILE", "Python config used to produce ntuples";
342 >      printf "%-29s%s\n", "  -r, --release NAME", "ntuple release used to produce ntuples";
343 >      printf "%-29s%s\n", "  -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
344 >    }
345 >  elsif ($command eq "finish")
346 >    {
347 >      print "Usage: $exeName -l DIRECTORY finish NAME\n";
348 >
349 >      print "Finalizes the database entry for dataset NAME, changing its status to\n";
350 >      print "\"present\".  This is intended to be the final step in command-line based ntuple\n";
351 >      print "production.\n";
352 >      print "\n";
353 >      print "Mandatory arguments to long options are mandatory for short options too.\n";
354 >      printf "%-29s%s\n", "  -l, --location DIRECTORY", "directory containing the ntuples";
355 >    }
356 >  elsif ($command eq "deleteEntry")
357 >    {
358 >      print "Usage: $exeName deleteEntry NAME\n";
359 >      print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
360 >      print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
361 >      print "please use the \"deprecate\" command instead.\n";
362 >    }
363 >  elsif ($command eq "deprecate")
364 >    {
365 >      print "Usage: $exeName deprecate NAME\n";
366 >      print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
367 >      print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
368 >      print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
369 >    }
370 >  elsif ($command eq "uploadRelease")
371 >    {
372 >      print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
373 >      print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
374 >      print "database entry for it, with NAME being the name of the release. An ntuple\n";
375 >      print "release is a CMSSW release with all the necessary packages added for creating\n";
376 >      print "ntuples, along with a default Python config file.\n";
377 >      print "\n";
378 >      print "Mandatory arguments to long options are mandatory for short options too.\n";
379 >      printf "%-29s%s\n", "  -c, --comment COMMENT", "comment for the database entry";
380 >      printf "%-29s%s\n", "  -f, --format FORMAT", "ntuple format (default: BEAN)";
381 >      printf "%-29s%s\n", "  -p, --pyConfig FILE", "default Python config for the release";
382 >      printf "%-29s%s\n", "  -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
383 >    }
384 >  else
385 >    {
386 >      print "Usage: $exeName [OPTION]... COMMAND NAME\n";
387 >      print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
388 >      print "\n";
389 >      print "Mandatory arguments to long options are mandatory for short options too.\n";
390 >      printf "%-29s%s\n", "  -h, --help", "print help. If COMMAND is present, print help";
391 >      printf "%-29s%s\n", " ", "specific to COMMAND.";
392 >      print "\n";
393 >      print "COMMAND may be one of the following:\n";
394 >      printf "%-29s%s\n", "  create", "creates the entry";
395 >      printf "%-29s%s\n", "  update", "updates the entry";
396 >      printf "%-29s%s\n", "  finish", "finalizes the database entry";
397 >      printf "%-29s%s\n", "  deleteEntry", "removes the database entry";
398 >      printf "%-29s%s\n", "  deprecate", "marks the dataset for deletion";
399 >      printf "%-29s%s\n", "  uploadRelease", "upload an ntuple release";
400 >    }
401  
402    exit;
403   }
404 +
405 + sub
406 + sizeOfDataset
407 + {
408 +  my $dataset = shift;
409 +  my $location = shift;
410 +  my $fileList = shift;
411 +
412 +  my $size = 0.0;
413 +  my $nFiles = 0;
414 +  if ($location && !$fileList)
415 +    {
416 +      if (!(-e $location))
417 +        {
418 +          print "$location does not exist!\n";
419 +          exit;
420 +        }
421 +      $nFiles = `ls $location | wc -l`;
422 +      $size = `du -s $location`;
423 +      $size =~ s/([^ ]*) .*/$1/;
424 +      $size /= 1024 * 1024;
425 +      $size = sprintf "%.2f", $size;
426 +    }
427 +  elsif ($fileList)
428 +    {
429 +      foreach my $file (split (/\n/, $fileList))
430 +        {
431 +          $nFiles++;
432 +          my $fileSize = `du -s $file`;
433 +          $fileSize =~ s/([^ ]*) .*/$1/;
434 +          $size += $fileSize;
435 +        }
436 +      $size /= 1024 * 1024;
437 +      $size = sprintf "%.2f", $size;
438 +    }
439 +
440 +  return ($nFiles, $size);
441 + }
442 +
443 + sub
444 + uploadRelease
445 + {
446 +  my $format = shift;
447 +  my $cmsswRelease = shift;
448 +  my $release = shift;
449 +  my $psetName = shift;
450 +  my $name = shift;
451 +  my $user = shift;
452 +  my $comment = shift;
453 +
454 +  move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
455 +  if (!(-e $psetName))
456 +    {
457 +      print "$psetName does not exist!\n";
458 +      exit;
459 +    }
460 +  open (PY_CONFIG, "<$psetName");
461 +  my @pset = <PY_CONFIG>;
462 +  close (PY_CONFIG);
463 +  my $pset = join ("", @pset);
464 +
465 +  my $query = "select max(id) from ntupleRelease";
466 +  $db->selectdb ("ntuple");
467 +  my $results = $db->query ($query);
468 +  my @row = $results->fetchrow ();
469 +  my $id = 1;
470 +  $id = $row[0] + 1 if $results->numrows ();
471 +
472 +  $name = addSlashes ($name);
473 +  $pset = addSlashes ($pset);
474 +  $user = addSlashes ($user);
475 +  $format = addSlashes ($format);
476 +  $cmsswRelease = addSlashes ($cmsswRelease);
477 +  $comment = addSlashes ($comment);
478 +
479 +  my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
480 +  $db->selectdb ("ntuple");
481 +  my $results = $db->query ($query);
482 + }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines