ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osudb
Revision: 1.4
Committed: Tue Jul 31 14:12:19 2012 UTC (12 years, 9 months ago) by ahart
Branch: MAIN
Changes since 1.3: +271 -60 lines
Log Message:
Add database entry deletion and release upload, connect to the database once, and add error checking.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use Getopt::Long;
6 use File::Copy;
7
8 sub getDataset;
9 sub dbUpdate;
10 sub dbDelete;
11 sub addSlashes;
12 sub uploadRelease;
13
14 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
15
16 my %opt;
17 Getopt::Long::Configure ("bundling");
18 GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
19 my $argc = @ARGV;
20
21 printHelp ($ARGV[0]) if $opt{"help"};
22 printHelp () if $argc != 2;
23 printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
24 if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
25 {
26 print "The directory containing the ntuples must be given!\n";
27 exit;
28 }
29 if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
30 {
31 print "Both the Python and the CMSSW release must be given!\n";
32 exit;
33 }
34 if ($ARGV[0] ne "uploadRelease")
35 {
36 my $id = -1;
37 $id = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
38 $ARGV[0] = "create" if $id < 0;
39 my $status = "present";
40 $status = "" if $ARGV[0] eq "update";
41 $status = "deprecated" if $ARGV[0] eq "deprecate";
42 $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
43 $opt{"location"} = "$ENV{'PWD'}/$opt{'location'}" if !($opt{"location"} =~ m/^\//);
44 dbUpdate ($id, $ARGV[1], $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"});
45 dbDelete ($id) if $ARGV[0] eq "deleteEntry";
46 }
47 else
48 {
49 my $parentDir = "./$opt{'release'}";
50 $parentDir =~ s/CMSSW_[^\/]*//g;
51 $parentDir =~ s/\/\/*/\//g;
52 my $cmsswRelease = $opt{'release'};
53 $cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
54 `tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
55 $opt{"format"} = "BEAN" if !$opt{"format"};
56 uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
57 }
58
59 sub
60 addSlashes
61 {
62 my $string = shift;
63
64 $string =~ s/\\/\\\\/g;
65 $string =~ s/'/\\'/g;
66 $string =~ s/"/\\"/g;
67 $string =~ s/\\0/\\\\0/g;
68
69 return $string;
70 }
71
72 sub
73 getDataset
74 {
75 my $command = shift;
76 my $dataset = shift;
77
78 my $results;
79 my $queryDataset = $dataset;
80 $queryDataset =~ s/\*/%/g;
81 $queryDataset =~ s/(.*)/%$1%/g;
82 my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
83 $db->selectdb ("ntuple");
84 $results = $db->query ($query);
85 if ($results->numrows () == 1)
86 {
87 my @row = $results->fetchrow ();
88 return $row[0];
89 }
90 if ($results->numrows () == 0)
91 {
92 if ($command ne "deleteEntry" && $command ne "deprecate")
93 {
94 print "Database entry does not exist. Create it? (Y/n): ";
95 my $response = <STDIN>;
96 $response =~ s/\n//g;
97 $response = "y" if !$response;
98 exit if substr (lc ($response), 0, 1) ne 'y';
99 return -1;
100 }
101 else
102 {
103 print "Database entry does not exist.\n";
104 exit;
105 }
106 }
107 my %map;
108 print "Found multiple database entries matching\n";
109 print "\"$dataset\":\n";
110 print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
111 for (my $i = 1; $i <= $results->numrows (); $i++)
112 {
113 my @row = $results->fetchrow ();
114 $map{"$i"} = $row[0];
115 printf "(%2d) $row[1]\n", $i;
116 print " created by $row[2] on $row[3]\n";
117 }
118 print "\nWhich entry would you like to modify?";
119 if ($command ne "deleteEntry" && $command ne "deprecate")
120 {
121 print " (Select 0 to create a new entry): "
122 }
123 else
124 {
125 print ": ";
126 }
127 my $response = <STDIN>;
128 $response =~ s/[ \t\n]//g;
129 return -1 if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
130 if (!(exists $map{$response}))
131 {
132 print "Your selection \"$response\" was not a valid option! Quitting.\n";
133 exit;
134 }
135
136 return $map{$response};
137 }
138
139 sub
140 dbUpdate
141 {
142 my $id = shift;
143 my $dataset = shift;
144 my $user = shift;
145 my $format = shift;
146 my $location = shift;
147 my $fileListName = shift;
148 my $status = shift;
149 my $comment = shift;
150 my $psetName = shift;
151 my $crabCfgName = shift;
152 my $jsonFileName = shift;
153 my $lumiSummaryName = shift;
154
155 my $fileList;
156 my $pset;
157 my $crabCfg;
158 my $jsonFile;
159 my $lumiSummary;
160 my $size = 0;
161 my $nFiles = 0;
162 if ($fileListName)
163 {
164 if (!(-e $fileListName))
165 {
166 print "$fileListName does not exist!\n";
167 exit;
168 }
169 open (FILE_LIST, "<$fileListName");
170 my @fileList = <FILE_LIST>;
171 close (FILE_LIST);
172 $fileList = join ("", @fileList);
173 }
174 if ($psetName)
175 {
176 if (!(-e $psetName))
177 {
178 print "$psetName does not exist!\n";
179 exit;
180 }
181 open (PY_CONFIG, "<$psetName");
182 my @pset = <PY_CONFIG>;
183 close (PY_CONFIG);
184 $pset = join ("", @pset);
185 }
186 if ($crabCfgName)
187 {
188 if (!(-e $crabCfgName))
189 {
190 print "$crabCfgName does not exist!\n";
191 exit;
192 }
193 open (CRAB_CONFIG, "<$crabCfgName");
194 my @crabCfg = <CRAB_CONFIG>;
195 close (CRAB_CONFIG);
196 $crabCfg = join ("", @crabCfg);
197 }
198 if ($jsonFileName)
199 {
200 if (!(-e $jsonFileName))
201 {
202 print "$jsonFileName does not exist!\n";
203 exit;
204 }
205 open (JSON_FILE, "<$jsonFileName");
206 my @jsonFile = <JSON_FILE>;
207 close (JSON_FILE);
208 $jsonFile = join ("", @jsonFile);
209 }
210 if ($lumiSummaryName)
211 {
212 if (!(-e $lumiSummaryName))
213 {
214 print "$lumiSummaryName does not exist!\n";
215 exit;
216 }
217 open (LUMI_SUMMARY, "<$lumiSummaryName");
218 my @lumiSummary = <LUMI_SUMMARY>;
219 close (LUMI_SUMMARY);
220 $lumiSummary = join ("", @lumiSummary);
221 }
222 ($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
223
224 $dataset = addSlashes ($dataset);
225 $user = addSlashes ($user);
226 $format = addSlashes ($format);
227 $pset = addSlashes ($pset);
228 $crabCfg = addSlashes ($crabCfg);
229 $jsonFile = addSlashes ($jsonFile);
230 $lumiSummary = addSlashes ($lumiSummary);
231 $location = addSlashes ($location);
232 $fileList = addSlashes ($fileList);
233 $status = addSlashes ($status);
234 $comment = addSlashes ($comment);
235
236 my $results;
237 my $query;
238 if ($id < 0)
239 {
240 $query = "select max(id) from ntuple";
241 $db->selectdb ("ntuple");
242 $results = $db->query ($query);
243 my @row = $results->fetchrow ();
244 my $id = 1;
245 $id = $row[0] + 1 if $results->numrows ();
246
247 $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary) values ($id, '$dataset', now(), now(), '$user', '$format', '$location', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary')";
248 }
249 if ($id > 0)
250 {
251 my $fields;
252 my $values;
253
254 $values .= ", dataset='$dataset'" if $dataset;
255 $values .= ", lastUpdateTime=now()";
256 $values .= ", format='$format'" if $format;
257 $values .= ", location='$location'" if $location;
258 $values .= ", fileList='$fileList'" if $fileList;
259 $values .= ", nFiles=$nFiles" if $nFiles;
260 $values .= ", sizeInGB=$size" if $size;
261 $values .= ", status='$status'" if $status;
262 $values .= ", comment='$comment'" if $comment;
263 $values .= ", pset='$pset'" if $pset;
264 $values .= ", crabCfg='$crabCfg'" if $crabCfg;
265 $values .= ", jsonFile='$jsonFile'" if $jsonFile;
266 $values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
267
268 $values =~ s/^, //;
269 $query = "update ntuple set $values where id=$id";
270 }
271 $results = $db->query ($query);
272
273 return $results;
274 }
275
276 sub
277 dbDelete
278 {
279 my $id = shift;
280
281 return if $id < 0;
282 my $query = "delete from ntuple where id=$id";
283 $db->selectdb ("ntuple");
284 my $results = $db->query ($query);
285 }
286
287 sub
288 printHelp
289 {
290 my $command = shift;
291
292 my $exeName = $0;
293 $exeName =~ s/^.*\/([^\/]*)$/$1/;
294
295 if ($command eq "create")
296 {
297 print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
298 print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
299 print "be present on the Tier 3, with the status being set accordingly.\n";
300 print "\n";
301 print "Mandatory arguments to long options are mandatory for short options too.\n";
302 printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
303 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
304 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
305 printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
306 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
307 printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
308 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
309 }
310 elsif ($command eq "update")
311 {
312 print "Usage: $exeName [OPTION]... update NAME\n";
313 print "Updates an existing database entry for dataset NAME.\n";
314 print "\n";
315 print "Mandatory arguments to long options are mandatory for short options too.\n";
316 printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
317 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
318 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
319 printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
320 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
321 printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
322 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
323 }
324 elsif ($command eq "finish")
325 {
326 print "Usage: $exeName -l DIRECTORY finish NAME\n";
327
328 print "Finalizes the database entry for dataset NAME, changing its status to\n";
329 print "\"present\". This is intended to be the final step in command-line based ntuple\n";
330 print "production.\n";
331 print "\n";
332 print "Mandatory arguments to long options are mandatory for short options too.\n";
333 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
334 }
335 elsif ($command eq "deleteEntry")
336 {
337 print "Usage: $exeName deleteEntry NAME\n";
338 print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
339 print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
340 print "please use the \"deprecate\" command instead.\n";
341 }
342 elsif ($command eq "deprecate")
343 {
344 print "Usage: $exeName deprecate NAME\n";
345 print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
346 print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
347 print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
348 }
349 elsif ($command eq "uploadRelease")
350 {
351 print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
352 print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
353 print "database entry for it, with NAME being the name of the release. An ntuple\n";
354 print "release is a CMSSW release with all the necessary packages added for creating\n";
355 print "ntuples, along with a default Python config file.\n";
356 print "\n";
357 print "Mandatory arguments to long options are mandatory for short options too.\n";
358 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
359 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
360 printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
361 printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
362 }
363 else
364 {
365 print "Usage: $exeName [OPTION]... COMMAND NAME\n";
366 print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
367 print "\n";
368 print "Mandatory arguments to long options are mandatory for short options too.\n";
369 printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
370 printf "%-29s%s\n", " ", "specific to COMMAND.";
371 print "\n";
372 print "COMMAND may be one of the following:\n";
373 printf "%-29s%s\n", " create", "creates the entry";
374 printf "%-29s%s\n", " update", "updates the entry";
375 printf "%-29s%s\n", " finish", "finalizes the database entry";
376 printf "%-29s%s\n", " deleteEntry", "removes the database entry";
377 printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
378 printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
379 }
380
381 exit;
382 }
383
384 sub
385 sizeOfDataset
386 {
387 my $dataset = shift;
388 my $location = shift;
389 my $fileList = shift;
390
391 my $query = "select nFiles from ntuple where dataset='$dataset'";
392 $db->selectdb ("ntuple");
393 my $results = $db->query ($query);
394 my @row = $results->fetchrow ();
395 my $previousNFiles = 0;
396 $previousNFiles = $row[0] if $results->numrows ();
397
398 my $size = 0.0;
399 my $nFiles = 0;
400 if ($location && !$fileList && $previousNFiles < 1)
401 {
402 if (!(-e $location))
403 {
404 print "$location does not exist!\n";
405 exit;
406 }
407 $nFiles = `ls $location | wc -l`;
408 $size = `du -s $location`;
409 $size =~ s/([^ ]*) .*/$1/;
410 $size /= 1024 * 1024;
411 $size = sprintf "%.2f", $size;
412 }
413 elsif ($fileList)
414 {
415 foreach my $file (split (/\n/, $fileList))
416 {
417 $nFiles++;
418 my $fileSize = `du -s $file`;
419 $fileSize =~ s/([^ ]*) .*/$1/;
420 $size += $fileSize;
421 }
422 $size /= 1024 * 1024;
423 $size = sprintf "%.2f", $size;
424 }
425
426 return ($nFiles, $size);
427 }
428
429 sub
430 uploadRelease
431 {
432 my $format = shift;
433 my $cmsswRelease = shift;
434 my $release = shift;
435 my $psetName = shift;
436 my $name = shift;
437 my $user = shift;
438 my $comment = shift;
439
440 move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
441 if (!(-e $psetName))
442 {
443 print "$psetName does not exist!\n";
444 exit;
445 }
446 open (PY_CONFIG, "<$psetName");
447 my @pset = <PY_CONFIG>;
448 close (PY_CONFIG);
449 my $pset = join ("", @pset);
450
451 my $query = "select max(id) from ntupleRelease";
452 $db->selectdb ("ntuple");
453 my $results = $db->query ($query);
454 my @row = $results->fetchrow ();
455 my $id = 1;
456 $id = $row[0] + 1 if $results->numrows ();
457
458 $name = addSlashes ($name);
459 $pset = addSlashes ($pset);
460 $user = addSlashes ($user);
461 $format = addSlashes ($format);
462 $cmsswRelease = addSlashes ($cmsswRelease);
463 $comment = addSlashes ($comment);
464
465 my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
466 $db->selectdb ("ntuple");
467 my $results = $db->query ($query);
468 }