ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osudb
Revision: 1.4
Committed: Tue Jul 31 14:12:19 2012 UTC (12 years, 9 months ago) by ahart
Branch: MAIN
Changes since 1.3: +271 -60 lines
Log Message:
Add database entry deletion and release upload, connect to the database once, and add error checking.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Mysql;
5     use Getopt::Long;
6 ahart 1.4 use File::Copy;
7 ahart 1.1
8     sub getDataset;
9     sub dbUpdate;
10 ahart 1.4 sub dbDelete;
11 ahart 1.1 sub addSlashes;
12 ahart 1.4 sub uploadRelease;
13 ahart 1.1
14 ahart 1.4 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
15 ahart 1.1
16     my %opt;
17     Getopt::Long::Configure ("bundling");
18 ahart 1.4 GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
19 ahart 1.1 my $argc = @ARGV;
20    
21 ahart 1.4 printHelp ($ARGV[0]) if $opt{"help"};
22 ahart 1.1 printHelp () if $argc != 2;
23 ahart 1.4 printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
24     if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
25     {
26     print "The directory containing the ntuples must be given!\n";
27     exit;
28     }
29     if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
30     {
31     print "Both the Python and the CMSSW release must be given!\n";
32     exit;
33     }
34     if ($ARGV[0] ne "uploadRelease")
35     {
36     my $id = -1;
37     $id = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
38     $ARGV[0] = "create" if $id < 0;
39     my $status = "present";
40     $status = "" if $ARGV[0] eq "update";
41     $status = "deprecated" if $ARGV[0] eq "deprecate";
42     $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
43     $opt{"location"} = "$ENV{'PWD'}/$opt{'location'}" if !($opt{"location"} =~ m/^\//);
44     dbUpdate ($id, $ARGV[1], $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"});
45     dbDelete ($id) if $ARGV[0] eq "deleteEntry";
46     }
47     else
48     {
49     my $parentDir = "./$opt{'release'}";
50     $parentDir =~ s/CMSSW_[^\/]*//g;
51     $parentDir =~ s/\/\/*/\//g;
52     my $cmsswRelease = $opt{'release'};
53     $cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
54     `tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
55     $opt{"format"} = "BEAN" if !$opt{"format"};
56     uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
57     }
58 ahart 1.1
59     sub
60     addSlashes
61 ahart 1.2 {
62 ahart 1.1 my $string = shift;
63    
64     $string =~ s/\\/\\\\/g;
65     $string =~ s/'/\\'/g;
66     $string =~ s/"/\\"/g;
67     $string =~ s/\\0/\\\\0/g;
68    
69     return $string;
70     }
71    
72     sub
73     getDataset
74     {
75 ahart 1.4 my $command = shift;
76 ahart 1.1 my $dataset = shift;
77    
78     my $results;
79 ahart 1.3 my $queryDataset = $dataset;
80     $queryDataset =~ s/\*/%/g;
81     $queryDataset =~ s/(.*)/%$1%/g;
82     my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
83 ahart 1.1 $db->selectdb ("ntuple");
84     $results = $db->query ($query);
85     if ($results->numrows () == 1)
86     {
87     my @row = $results->fetchrow ();
88     return $row[0];
89     }
90     if ($results->numrows () == 0)
91     {
92 ahart 1.4 if ($command ne "deleteEntry" && $command ne "deprecate")
93     {
94     print "Database entry does not exist. Create it? (Y/n): ";
95     my $response = <STDIN>;
96     $response =~ s/\n//g;
97     $response = "y" if !$response;
98     exit if substr (lc ($response), 0, 1) ne 'y';
99     return -1;
100     }
101     else
102     {
103     print "Database entry does not exist.\n";
104     exit;
105     }
106 ahart 1.1 }
107     my %map;
108     print "Found multiple database entries matching\n";
109     print "\"$dataset\":\n";
110 ahart 1.4 print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
111 ahart 1.1 for (my $i = 1; $i <= $results->numrows (); $i++)
112     {
113     my @row = $results->fetchrow ();
114     $map{"$i"} = $row[0];
115 ahart 1.3 printf "(%2d) $row[1]\n", $i;
116     print " created by $row[2] on $row[3]\n";
117 ahart 1.1 }
118 ahart 1.4 print "\nWhich entry would you like to modify?";
119     if ($command ne "deleteEntry" && $command ne "deprecate")
120     {
121     print " (Select 0 to create a new entry): "
122     }
123     else
124     {
125     print ": ";
126     }
127 ahart 1.1 my $response = <STDIN>;
128     $response =~ s/[ \t\n]//g;
129 ahart 1.4 return -1 if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
130 ahart 1.1 if (!(exists $map{$response}))
131     {
132     print "Your selection \"$response\" was not a valid option! Quitting.\n";
133     exit;
134     }
135    
136     return $map{$response};
137     }
138    
139     sub
140     dbUpdate
141     {
142     my $id = shift;
143     my $dataset = shift;
144     my $user = shift;
145     my $format = shift;
146     my $location = shift;
147 ahart 1.4 my $fileListName = shift;
148 ahart 1.1 my $status = shift;
149     my $comment = shift;
150     my $psetName = shift;
151     my $crabCfgName = shift;
152     my $jsonFileName = shift;
153     my $lumiSummaryName = shift;
154    
155 ahart 1.4 my $fileList;
156 ahart 1.1 my $pset;
157     my $crabCfg;
158     my $jsonFile;
159     my $lumiSummary;
160     my $size = 0;
161     my $nFiles = 0;
162 ahart 1.4 if ($fileListName)
163     {
164     if (!(-e $fileListName))
165     {
166     print "$fileListName does not exist!\n";
167     exit;
168     }
169     open (FILE_LIST, "<$fileListName");
170     my @fileList = <FILE_LIST>;
171     close (FILE_LIST);
172     $fileList = join ("", @fileList);
173     }
174 ahart 1.1 if ($psetName)
175     {
176     if (!(-e $psetName))
177     {
178     print "$psetName does not exist!\n";
179     exit;
180     }
181     open (PY_CONFIG, "<$psetName");
182     my @pset = <PY_CONFIG>;
183     close (PY_CONFIG);
184     $pset = join ("", @pset);
185     }
186     if ($crabCfgName)
187     {
188     if (!(-e $crabCfgName))
189     {
190     print "$crabCfgName does not exist!\n";
191     exit;
192     }
193     open (CRAB_CONFIG, "<$crabCfgName");
194     my @crabCfg = <CRAB_CONFIG>;
195     close (CRAB_CONFIG);
196     $crabCfg = join ("", @crabCfg);
197     }
198     if ($jsonFileName)
199     {
200     if (!(-e $jsonFileName))
201     {
202     print "$jsonFileName does not exist!\n";
203     exit;
204     }
205     open (JSON_FILE, "<$jsonFileName");
206     my @jsonFile = <JSON_FILE>;
207     close (JSON_FILE);
208     $jsonFile = join ("", @jsonFile);
209     }
210     if ($lumiSummaryName)
211     {
212     if (!(-e $lumiSummaryName))
213     {
214     print "$lumiSummaryName does not exist!\n";
215     exit;
216     }
217     open (LUMI_SUMMARY, "<$lumiSummaryName");
218     my @lumiSummary = <LUMI_SUMMARY>;
219     close (LUMI_SUMMARY);
220     $lumiSummary = join ("", @lumiSummary);
221     }
222 ahart 1.4 ($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
223 ahart 1.1
224     $dataset = addSlashes ($dataset);
225     $user = addSlashes ($user);
226     $format = addSlashes ($format);
227     $pset = addSlashes ($pset);
228     $crabCfg = addSlashes ($crabCfg);
229     $jsonFile = addSlashes ($jsonFile);
230     $lumiSummary = addSlashes ($lumiSummary);
231     $location = addSlashes ($location);
232 ahart 1.4 $fileList = addSlashes ($fileList);
233 ahart 1.1 $status = addSlashes ($status);
234     $comment = addSlashes ($comment);
235    
236     my $results;
237     my $query;
238     if ($id < 0)
239     {
240     $query = "select max(id) from ntuple";
241     $db->selectdb ("ntuple");
242     $results = $db->query ($query);
243     my @row = $results->fetchrow ();
244     my $id = 1;
245     $id = $row[0] + 1 if $results->numrows ();
246    
247 ahart 1.4 $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary) values ($id, '$dataset', now(), now(), '$user', '$format', '$location', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary')";
248 ahart 1.1 }
249     if ($id > 0)
250     {
251     my $fields;
252     my $values;
253    
254     $values .= ", dataset='$dataset'" if $dataset;
255     $values .= ", lastUpdateTime=now()";
256     $values .= ", format='$format'" if $format;
257     $values .= ", location='$location'" if $location;
258 ahart 1.4 $values .= ", fileList='$fileList'" if $fileList;
259     $values .= ", nFiles=$nFiles" if $nFiles;
260     $values .= ", sizeInGB=$size" if $size;
261 ahart 1.1 $values .= ", status='$status'" if $status;
262     $values .= ", comment='$comment'" if $comment;
263     $values .= ", pset='$pset'" if $pset;
264     $values .= ", crabCfg='$crabCfg'" if $crabCfg;
265     $values .= ", jsonFile='$jsonFile'" if $jsonFile;
266     $values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
267    
268     $values =~ s/^, //;
269     $query = "update ntuple set $values where id=$id";
270     }
271     $results = $db->query ($query);
272    
273     return $results;
274     }
275    
276     sub
277 ahart 1.4 dbDelete
278     {
279     my $id = shift;
280    
281     return if $id < 0;
282     my $query = "delete from ntuple where id=$id";
283     $db->selectdb ("ntuple");
284     my $results = $db->query ($query);
285     }
286    
287     sub
288 ahart 1.1 printHelp
289     {
290 ahart 1.4 my $command = shift;
291    
292 ahart 1.1 my $exeName = $0;
293     $exeName =~ s/^.*\/([^\/]*)$/$1/;
294    
295 ahart 1.4 if ($command eq "create")
296     {
297     print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
298     print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
299     print "be present on the Tier 3, with the status being set accordingly.\n";
300     print "\n";
301     print "Mandatory arguments to long options are mandatory for short options too.\n";
302     printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
303     printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
304     printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
305     printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
306     printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
307     printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
308     printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
309     }
310     elsif ($command eq "update")
311     {
312     print "Usage: $exeName [OPTION]... update NAME\n";
313     print "Updates an existing database entry for dataset NAME.\n";
314     print "\n";
315     print "Mandatory arguments to long options are mandatory for short options too.\n";
316     printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
317     printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
318     printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
319     printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
320     printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
321     printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
322     printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
323     }
324     elsif ($command eq "finish")
325     {
326     print "Usage: $exeName -l DIRECTORY finish NAME\n";
327    
328     print "Finalizes the database entry for dataset NAME, changing its status to\n";
329     print "\"present\". This is intended to be the final step in command-line based ntuple\n";
330     print "production.\n";
331     print "\n";
332     print "Mandatory arguments to long options are mandatory for short options too.\n";
333     printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
334     }
335     elsif ($command eq "deleteEntry")
336     {
337     print "Usage: $exeName deleteEntry NAME\n";
338     print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
339     print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
340     print "please use the \"deprecate\" command instead.\n";
341     }
342     elsif ($command eq "deprecate")
343     {
344     print "Usage: $exeName deprecate NAME\n";
345     print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
346     print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
347     print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
348     }
349     elsif ($command eq "uploadRelease")
350     {
351     print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
352     print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
353     print "database entry for it, with NAME being the name of the release. An ntuple\n";
354     print "release is a CMSSW release with all the necessary packages added for creating\n";
355     print "ntuples, along with a default Python config file.\n";
356     print "\n";
357     print "Mandatory arguments to long options are mandatory for short options too.\n";
358     printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
359     printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
360     printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
361     printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
362     }
363     else
364     {
365     print "Usage: $exeName [OPTION]... COMMAND NAME\n";
366     print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
367     print "\n";
368     print "Mandatory arguments to long options are mandatory for short options too.\n";
369     printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
370     printf "%-29s%s\n", " ", "specific to COMMAND.";
371     print "\n";
372     print "COMMAND may be one of the following:\n";
373     printf "%-29s%s\n", " create", "creates the entry";
374     printf "%-29s%s\n", " update", "updates the entry";
375     printf "%-29s%s\n", " finish", "finalizes the database entry";
376     printf "%-29s%s\n", " deleteEntry", "removes the database entry";
377     printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
378     printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
379     }
380 ahart 1.1
381     exit;
382     }
383 ahart 1.4
384     sub
385     sizeOfDataset
386     {
387     my $dataset = shift;
388     my $location = shift;
389     my $fileList = shift;
390    
391     my $query = "select nFiles from ntuple where dataset='$dataset'";
392     $db->selectdb ("ntuple");
393     my $results = $db->query ($query);
394     my @row = $results->fetchrow ();
395     my $previousNFiles = 0;
396     $previousNFiles = $row[0] if $results->numrows ();
397    
398     my $size = 0.0;
399     my $nFiles = 0;
400     if ($location && !$fileList && $previousNFiles < 1)
401     {
402     if (!(-e $location))
403     {
404     print "$location does not exist!\n";
405     exit;
406     }
407     $nFiles = `ls $location | wc -l`;
408     $size = `du -s $location`;
409     $size =~ s/([^ ]*) .*/$1/;
410     $size /= 1024 * 1024;
411     $size = sprintf "%.2f", $size;
412     }
413     elsif ($fileList)
414     {
415     foreach my $file (split (/\n/, $fileList))
416     {
417     $nFiles++;
418     my $fileSize = `du -s $file`;
419     $fileSize =~ s/([^ ]*) .*/$1/;
420     $size += $fileSize;
421     }
422     $size /= 1024 * 1024;
423     $size = sprintf "%.2f", $size;
424     }
425    
426     return ($nFiles, $size);
427     }
428    
429     sub
430     uploadRelease
431     {
432     my $format = shift;
433     my $cmsswRelease = shift;
434     my $release = shift;
435     my $psetName = shift;
436     my $name = shift;
437     my $user = shift;
438     my $comment = shift;
439    
440     move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
441     if (!(-e $psetName))
442     {
443     print "$psetName does not exist!\n";
444     exit;
445     }
446     open (PY_CONFIG, "<$psetName");
447     my @pset = <PY_CONFIG>;
448     close (PY_CONFIG);
449     my $pset = join ("", @pset);
450    
451     my $query = "select max(id) from ntupleRelease";
452     $db->selectdb ("ntuple");
453     my $results = $db->query ($query);
454     my @row = $results->fetchrow ();
455     my $id = 1;
456     $id = $row[0] + 1 if $results->numrows ();
457    
458     $name = addSlashes ($name);
459     $pset = addSlashes ($pset);
460     $user = addSlashes ($user);
461     $format = addSlashes ($format);
462     $cmsswRelease = addSlashes ($cmsswRelease);
463     $comment = addSlashes ($comment);
464    
465     my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
466     $db->selectdb ("ntuple");
467     my $results = $db->query ($query);
468     }