ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osudb
Revision: 1.5
Committed: Tue Jul 31 14:55:05 2012 UTC (12 years, 9 months ago) by ahart
Branch: MAIN
Changes since 1.4: +30 -10 lines
Log Message:
Fix a bug with the fuzzy dataset names, and add an option for setting the ntuple release in the database.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Mysql;
5     use Getopt::Long;
6 ahart 1.4 use File::Copy;
7 ahart 1.1
8     sub getDataset;
9     sub dbUpdate;
10 ahart 1.4 sub dbDelete;
11 ahart 1.1 sub addSlashes;
12 ahart 1.4 sub uploadRelease;
13 ahart 1.1
14 ahart 1.4 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
15 ahart 1.1
16     my %opt;
17     Getopt::Long::Configure ("bundling");
18 ahart 1.4 GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
19 ahart 1.1 my $argc = @ARGV;
20    
21 ahart 1.4 printHelp ($ARGV[0]) if $opt{"help"};
22 ahart 1.1 printHelp () if $argc != 2;
23 ahart 1.4 printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
24     if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
25     {
26     print "The directory containing the ntuples must be given!\n";
27     exit;
28     }
29     if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
30     {
31     print "Both the Python and the CMSSW release must be given!\n";
32     exit;
33     }
34     if ($ARGV[0] ne "uploadRelease")
35     {
36     my $id = -1;
37 ahart 1.5 my $fullDataset;
38     ($id, $fullDataset) = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
39     $fullDataset = $ARGV[1] if $id < 0;
40 ahart 1.4 $ARGV[0] = "create" if $id < 0;
41     my $status = "present";
42     $status = "" if $ARGV[0] eq "update";
43     $status = "deprecated" if $ARGV[0] eq "deprecate";
44     $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
45     $opt{"location"} = "$ENV{'PWD'}/$opt{'location'}" if !($opt{"location"} =~ m/^\//);
46 ahart 1.5 dbUpdate ($id, $fullDataset, $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"}, $opt{"release"});
47 ahart 1.4 dbDelete ($id) if $ARGV[0] eq "deleteEntry";
48     }
49     else
50     {
51     my $parentDir = "./$opt{'release'}";
52     $parentDir =~ s/CMSSW_[^\/]*//g;
53     $parentDir =~ s/\/\/*/\//g;
54     my $cmsswRelease = $opt{'release'};
55     $cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
56     `tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
57     $opt{"format"} = "BEAN" if !$opt{"format"};
58     uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
59     }
60 ahart 1.1
61     sub
62     addSlashes
63 ahart 1.2 {
64 ahart 1.1 my $string = shift;
65    
66     $string =~ s/\\/\\\\/g;
67     $string =~ s/'/\\'/g;
68     $string =~ s/"/\\"/g;
69     $string =~ s/\\0/\\\\0/g;
70    
71     return $string;
72     }
73    
74     sub
75     getDataset
76     {
77 ahart 1.4 my $command = shift;
78 ahart 1.1 my $dataset = shift;
79    
80     my $results;
81 ahart 1.3 my $queryDataset = $dataset;
82     $queryDataset =~ s/\*/%/g;
83     $queryDataset =~ s/(.*)/%$1%/g;
84     my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
85 ahart 1.1 $db->selectdb ("ntuple");
86     $results = $db->query ($query);
87     if ($results->numrows () == 1)
88     {
89     my @row = $results->fetchrow ();
90 ahart 1.5 return ($row[0], $row[1]);
91 ahart 1.1 }
92     if ($results->numrows () == 0)
93     {
94 ahart 1.4 if ($command ne "deleteEntry" && $command ne "deprecate")
95     {
96     print "Database entry does not exist. Create it? (Y/n): ";
97     my $response = <STDIN>;
98     $response =~ s/\n//g;
99     $response = "y" if !$response;
100     exit if substr (lc ($response), 0, 1) ne 'y';
101 ahart 1.5 return (-1, "");
102 ahart 1.4 }
103     else
104     {
105     print "Database entry does not exist.\n";
106     exit;
107     }
108 ahart 1.1 }
109 ahart 1.5 my %id;
110     my %fullDataset;
111 ahart 1.1 print "Found multiple database entries matching\n";
112     print "\"$dataset\":\n";
113 ahart 1.4 print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
114 ahart 1.1 for (my $i = 1; $i <= $results->numrows (); $i++)
115     {
116     my @row = $results->fetchrow ();
117 ahart 1.5 $id{"$i"} = $row[0];
118     $fullDataset{"$i"} = $row[1];
119 ahart 1.3 printf "(%2d) $row[1]\n", $i;
120     print " created by $row[2] on $row[3]\n";
121 ahart 1.1 }
122 ahart 1.4 print "\nWhich entry would you like to modify?";
123     if ($command ne "deleteEntry" && $command ne "deprecate")
124     {
125     print " (Select 0 to create a new entry): "
126     }
127     else
128     {
129     print ": ";
130     }
131 ahart 1.1 my $response = <STDIN>;
132     $response =~ s/[ \t\n]//g;
133 ahart 1.5 return (-1, "") if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
134     if (!(exists $id{$response}))
135 ahart 1.1 {
136     print "Your selection \"$response\" was not a valid option! Quitting.\n";
137     exit;
138     }
139    
140 ahart 1.5 return ($id{$response}, $fullDataset{$response});
141 ahart 1.1 }
142    
143     sub
144     dbUpdate
145     {
146     my $id = shift;
147     my $dataset = shift;
148     my $user = shift;
149     my $format = shift;
150     my $location = shift;
151 ahart 1.4 my $fileListName = shift;
152 ahart 1.1 my $status = shift;
153     my $comment = shift;
154     my $psetName = shift;
155     my $crabCfgName = shift;
156     my $jsonFileName = shift;
157     my $lumiSummaryName = shift;
158 ahart 1.5 my $release = shift;
159 ahart 1.1
160 ahart 1.4 my $fileList;
161 ahart 1.1 my $pset;
162     my $crabCfg;
163     my $jsonFile;
164     my $lumiSummary;
165     my $size = 0;
166     my $nFiles = 0;
167 ahart 1.4 if ($fileListName)
168     {
169     if (!(-e $fileListName))
170     {
171     print "$fileListName does not exist!\n";
172     exit;
173     }
174     open (FILE_LIST, "<$fileListName");
175     my @fileList = <FILE_LIST>;
176     close (FILE_LIST);
177     $fileList = join ("", @fileList);
178     }
179 ahart 1.1 if ($psetName)
180     {
181     if (!(-e $psetName))
182     {
183     print "$psetName does not exist!\n";
184     exit;
185     }
186     open (PY_CONFIG, "<$psetName");
187     my @pset = <PY_CONFIG>;
188     close (PY_CONFIG);
189     $pset = join ("", @pset);
190     }
191     if ($crabCfgName)
192     {
193     if (!(-e $crabCfgName))
194     {
195     print "$crabCfgName does not exist!\n";
196     exit;
197     }
198     open (CRAB_CONFIG, "<$crabCfgName");
199     my @crabCfg = <CRAB_CONFIG>;
200     close (CRAB_CONFIG);
201     $crabCfg = join ("", @crabCfg);
202     }
203     if ($jsonFileName)
204     {
205     if (!(-e $jsonFileName))
206     {
207     print "$jsonFileName does not exist!\n";
208     exit;
209     }
210     open (JSON_FILE, "<$jsonFileName");
211     my @jsonFile = <JSON_FILE>;
212     close (JSON_FILE);
213     $jsonFile = join ("", @jsonFile);
214     }
215     if ($lumiSummaryName)
216     {
217     if (!(-e $lumiSummaryName))
218     {
219     print "$lumiSummaryName does not exist!\n";
220     exit;
221     }
222     open (LUMI_SUMMARY, "<$lumiSummaryName");
223     my @lumiSummary = <LUMI_SUMMARY>;
224     close (LUMI_SUMMARY);
225     $lumiSummary = join ("", @lumiSummary);
226     }
227 ahart 1.5 if ($release)
228     {
229     my $query = "select id from ntupleRelease where name='$release'";
230     $db->selectdb ("ntuple");
231     my $results = $db->query ($query);
232     if ($results->numrows () != 1)
233     {
234     print "Ntuple release \"$release\" not found!\n";
235     exit;
236     }
237     }
238 ahart 1.4 ($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
239 ahart 1.1
240     $dataset = addSlashes ($dataset);
241     $user = addSlashes ($user);
242     $format = addSlashes ($format);
243     $pset = addSlashes ($pset);
244     $crabCfg = addSlashes ($crabCfg);
245     $jsonFile = addSlashes ($jsonFile);
246     $lumiSummary = addSlashes ($lumiSummary);
247     $location = addSlashes ($location);
248 ahart 1.4 $fileList = addSlashes ($fileList);
249 ahart 1.1 $status = addSlashes ($status);
250     $comment = addSlashes ($comment);
251 ahart 1.5 $release = addSlashes ($release);
252 ahart 1.1
253     my $results;
254     my $query;
255     if ($id < 0)
256     {
257     $query = "select max(id) from ntuple";
258     $db->selectdb ("ntuple");
259     $results = $db->query ($query);
260     my @row = $results->fetchrow ();
261     my $id = 1;
262     $id = $row[0] + 1 if $results->numrows ();
263    
264 ahart 1.5 $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary, version) values ($id, '$dataset', now(), now(), '$user', '$format', '$location', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary', '$release')";
265 ahart 1.1 }
266     if ($id > 0)
267     {
268     my $fields;
269     my $values;
270    
271     $values .= ", dataset='$dataset'" if $dataset;
272     $values .= ", lastUpdateTime=now()";
273     $values .= ", format='$format'" if $format;
274     $values .= ", location='$location'" if $location;
275 ahart 1.4 $values .= ", fileList='$fileList'" if $fileList;
276     $values .= ", nFiles=$nFiles" if $nFiles;
277     $values .= ", sizeInGB=$size" if $size;
278 ahart 1.1 $values .= ", status='$status'" if $status;
279     $values .= ", comment='$comment'" if $comment;
280     $values .= ", pset='$pset'" if $pset;
281     $values .= ", crabCfg='$crabCfg'" if $crabCfg;
282     $values .= ", jsonFile='$jsonFile'" if $jsonFile;
283     $values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
284 ahart 1.5 $values .= ", version='$release'" if $release;
285 ahart 1.1
286     $values =~ s/^, //;
287     $query = "update ntuple set $values where id=$id";
288     }
289     $results = $db->query ($query);
290    
291     return $results;
292     }
293    
294     sub
295 ahart 1.4 dbDelete
296     {
297     my $id = shift;
298    
299     return if $id < 0;
300     my $query = "delete from ntuple where id=$id";
301     $db->selectdb ("ntuple");
302     my $results = $db->query ($query);
303     }
304    
305     sub
306 ahart 1.1 printHelp
307     {
308 ahart 1.4 my $command = shift;
309    
310 ahart 1.1 my $exeName = $0;
311     $exeName =~ s/^.*\/([^\/]*)$/$1/;
312    
313 ahart 1.4 if ($command eq "create")
314     {
315     print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
316     print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
317     print "be present on the Tier 3, with the status being set accordingly.\n";
318     print "\n";
319     print "Mandatory arguments to long options are mandatory for short options too.\n";
320     printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
321     printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
322     printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
323     printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
324     printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
325     printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
326 ahart 1.5 printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
327 ahart 1.4 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
328     }
329     elsif ($command eq "update")
330     {
331     print "Usage: $exeName [OPTION]... update NAME\n";
332     print "Updates an existing database entry for dataset NAME.\n";
333     print "\n";
334     print "Mandatory arguments to long options are mandatory for short options too.\n";
335     printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
336     printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
337     printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
338     printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
339     printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
340     printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
341 ahart 1.5 printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
342 ahart 1.4 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
343     }
344     elsif ($command eq "finish")
345     {
346     print "Usage: $exeName -l DIRECTORY finish NAME\n";
347    
348     print "Finalizes the database entry for dataset NAME, changing its status to\n";
349     print "\"present\". This is intended to be the final step in command-line based ntuple\n";
350     print "production.\n";
351     print "\n";
352     print "Mandatory arguments to long options are mandatory for short options too.\n";
353     printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
354     }
355     elsif ($command eq "deleteEntry")
356     {
357     print "Usage: $exeName deleteEntry NAME\n";
358     print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
359     print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
360     print "please use the \"deprecate\" command instead.\n";
361     }
362     elsif ($command eq "deprecate")
363     {
364     print "Usage: $exeName deprecate NAME\n";
365     print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
366     print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
367     print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
368     }
369     elsif ($command eq "uploadRelease")
370     {
371     print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
372     print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
373     print "database entry for it, with NAME being the name of the release. An ntuple\n";
374     print "release is a CMSSW release with all the necessary packages added for creating\n";
375     print "ntuples, along with a default Python config file.\n";
376     print "\n";
377     print "Mandatory arguments to long options are mandatory for short options too.\n";
378     printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
379     printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
380     printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
381     printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
382     }
383     else
384     {
385     print "Usage: $exeName [OPTION]... COMMAND NAME\n";
386     print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
387     print "\n";
388     print "Mandatory arguments to long options are mandatory for short options too.\n";
389     printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
390     printf "%-29s%s\n", " ", "specific to COMMAND.";
391     print "\n";
392     print "COMMAND may be one of the following:\n";
393     printf "%-29s%s\n", " create", "creates the entry";
394     printf "%-29s%s\n", " update", "updates the entry";
395     printf "%-29s%s\n", " finish", "finalizes the database entry";
396     printf "%-29s%s\n", " deleteEntry", "removes the database entry";
397     printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
398     printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
399     }
400 ahart 1.1
401     exit;
402     }
403 ahart 1.4
404     sub
405     sizeOfDataset
406     {
407     my $dataset = shift;
408     my $location = shift;
409     my $fileList = shift;
410    
411     my $query = "select nFiles from ntuple where dataset='$dataset'";
412     $db->selectdb ("ntuple");
413     my $results = $db->query ($query);
414     my @row = $results->fetchrow ();
415     my $previousNFiles = 0;
416     $previousNFiles = $row[0] if $results->numrows ();
417    
418     my $size = 0.0;
419     my $nFiles = 0;
420     if ($location && !$fileList && $previousNFiles < 1)
421     {
422     if (!(-e $location))
423     {
424     print "$location does not exist!\n";
425     exit;
426     }
427     $nFiles = `ls $location | wc -l`;
428     $size = `du -s $location`;
429     $size =~ s/([^ ]*) .*/$1/;
430     $size /= 1024 * 1024;
431     $size = sprintf "%.2f", $size;
432     }
433     elsif ($fileList)
434     {
435     foreach my $file (split (/\n/, $fileList))
436     {
437     $nFiles++;
438     my $fileSize = `du -s $file`;
439     $fileSize =~ s/([^ ]*) .*/$1/;
440     $size += $fileSize;
441     }
442     $size /= 1024 * 1024;
443     $size = sprintf "%.2f", $size;
444     }
445    
446     return ($nFiles, $size);
447     }
448    
449     sub
450     uploadRelease
451     {
452     my $format = shift;
453     my $cmsswRelease = shift;
454     my $release = shift;
455     my $psetName = shift;
456     my $name = shift;
457     my $user = shift;
458     my $comment = shift;
459    
460     move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
461     if (!(-e $psetName))
462     {
463     print "$psetName does not exist!\n";
464     exit;
465     }
466     open (PY_CONFIG, "<$psetName");
467     my @pset = <PY_CONFIG>;
468     close (PY_CONFIG);
469     my $pset = join ("", @pset);
470    
471     my $query = "select max(id) from ntupleRelease";
472     $db->selectdb ("ntuple");
473     my $results = $db->query ($query);
474     my @row = $results->fetchrow ();
475     my $id = 1;
476     $id = $row[0] + 1 if $results->numrows ();
477    
478     $name = addSlashes ($name);
479     $pset = addSlashes ($pset);
480     $user = addSlashes ($user);
481     $format = addSlashes ($format);
482     $cmsswRelease = addSlashes ($cmsswRelease);
483     $comment = addSlashes ($comment);
484    
485     my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
486     $db->selectdb ("ntuple");
487     my $results = $db->query ($query);
488     }