ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/osudb
Revision: 1.5
Committed: Tue Jul 31 14:55:05 2012 UTC (12 years, 9 months ago) by ahart
Branch: MAIN
Changes since 1.4: +30 -10 lines
Log Message:
Fix a bug with the fuzzy dataset names, and add an option for setting the ntuple release in the database.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Mysql;
5 use Getopt::Long;
6 use File::Copy;
7
8 sub getDataset;
9 sub dbUpdate;
10 sub dbDelete;
11 sub addSlashes;
12 sub uploadRelease;
13
14 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
15
16 my %opt;
17 Getopt::Long::Configure ("bundling");
18 GetOptions (\%opt, "comment|c=s", "format|f=s", "pyConfig|p=s", "crabCfg|b=s", "jsonFile|j=s", "lumiSummary|s=s", "location|l=s", "fileList|t=s", "release|r=s", "help|h");
19 my $argc = @ARGV;
20
21 printHelp ($ARGV[0]) if $opt{"help"};
22 printHelp () if $argc != 2;
23 printHelp () if $ARGV[0] ne "create" && $ARGV[0] ne "update" && $ARGV[0] ne "finish" && $ARGV[0] ne "deprecate" && $ARGV[0] ne "uploadRelease" && $ARGV[0] ne "deleteEntry";
24 if (($ARGV[0] eq "create" || $ARGV[0] eq "finish") && !$opt{"location"})
25 {
26 print "The directory containing the ntuples must be given!\n";
27 exit;
28 }
29 if ($ARGV[0] eq "uploadRelease" && !$opt{"pyConfig"} && !$opt{"release"})
30 {
31 print "Both the Python and the CMSSW release must be given!\n";
32 exit;
33 }
34 if ($ARGV[0] ne "uploadRelease")
35 {
36 my $id = -1;
37 my $fullDataset;
38 ($id, $fullDataset) = getDataset ($ARGV[0], $ARGV[1]) if $ARGV[0] ne "create";
39 $fullDataset = $ARGV[1] if $id < 0;
40 $ARGV[0] = "create" if $id < 0;
41 my $status = "present";
42 $status = "" if $ARGV[0] eq "update";
43 $status = "deprecated" if $ARGV[0] eq "deprecate";
44 $opt{"format"} = "BEAN" if $ARGV[0] eq "create" && !$opt{"format"};
45 $opt{"location"} = "$ENV{'PWD'}/$opt{'location'}" if !($opt{"location"} =~ m/^\//);
46 dbUpdate ($id, $fullDataset, $ENV{"USER"}, $opt{"format"}, $opt{"location"}, $opt{"fileList"}, $status, $opt{"comment"}, $opt{"pyConfig"}, $opt{"crabCfg"}, $opt{"jsonFile"}, $opt{"lumiSummary"}, $opt{"release"});
47 dbDelete ($id) if $ARGV[0] eq "deleteEntry";
48 }
49 else
50 {
51 my $parentDir = "./$opt{'release'}";
52 $parentDir =~ s/CMSSW_[^\/]*//g;
53 $parentDir =~ s/\/\/*/\//g;
54 my $cmsswRelease = $opt{'release'};
55 $cmsswRelease =~ s/^.*CMSSW_([^\/]*).*$/CMSSW_$1/;
56 `tar -C $parentDir -czf $ARGV[1].tar.gz $opt{"release"}`;
57 $opt{"format"} = "BEAN" if !$opt{"format"};
58 uploadRelease ($opt{"format"}, $cmsswRelease, "$ARGV[1].tar.gz", $opt{"pyConfig"}, $ARGV[1], $ENV{"USER"}, $opt{"comment"});
59 }
60
61 sub
62 addSlashes
63 {
64 my $string = shift;
65
66 $string =~ s/\\/\\\\/g;
67 $string =~ s/'/\\'/g;
68 $string =~ s/"/\\"/g;
69 $string =~ s/\\0/\\\\0/g;
70
71 return $string;
72 }
73
74 sub
75 getDataset
76 {
77 my $command = shift;
78 my $dataset = shift;
79
80 my $results;
81 my $queryDataset = $dataset;
82 $queryDataset =~ s/\*/%/g;
83 $queryDataset =~ s/(.*)/%$1%/g;
84 my $query = "select id,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by lastUpdateTime";
85 $db->selectdb ("ntuple");
86 $results = $db->query ($query);
87 if ($results->numrows () == 1)
88 {
89 my @row = $results->fetchrow ();
90 return ($row[0], $row[1]);
91 }
92 if ($results->numrows () == 0)
93 {
94 if ($command ne "deleteEntry" && $command ne "deprecate")
95 {
96 print "Database entry does not exist. Create it? (Y/n): ";
97 my $response = <STDIN>;
98 $response =~ s/\n//g;
99 $response = "y" if !$response;
100 exit if substr (lc ($response), 0, 1) ne 'y';
101 return (-1, "");
102 }
103 else
104 {
105 print "Database entry does not exist.\n";
106 exit;
107 }
108 }
109 my %id;
110 my %fullDataset;
111 print "Found multiple database entries matching\n";
112 print "\"$dataset\":\n";
113 print "( 0) new\n" if $command ne "deleteEntry" && $command ne "deprecate";
114 for (my $i = 1; $i <= $results->numrows (); $i++)
115 {
116 my @row = $results->fetchrow ();
117 $id{"$i"} = $row[0];
118 $fullDataset{"$i"} = $row[1];
119 printf "(%2d) $row[1]\n", $i;
120 print " created by $row[2] on $row[3]\n";
121 }
122 print "\nWhich entry would you like to modify?";
123 if ($command ne "deleteEntry" && $command ne "deprecate")
124 {
125 print " (Select 0 to create a new entry): "
126 }
127 else
128 {
129 print ": ";
130 }
131 my $response = <STDIN>;
132 $response =~ s/[ \t\n]//g;
133 return (-1, "") if $response == 0 && $command ne "deleteEntry" && $command ne "deprecate";
134 if (!(exists $id{$response}))
135 {
136 print "Your selection \"$response\" was not a valid option! Quitting.\n";
137 exit;
138 }
139
140 return ($id{$response}, $fullDataset{$response});
141 }
142
143 sub
144 dbUpdate
145 {
146 my $id = shift;
147 my $dataset = shift;
148 my $user = shift;
149 my $format = shift;
150 my $location = shift;
151 my $fileListName = shift;
152 my $status = shift;
153 my $comment = shift;
154 my $psetName = shift;
155 my $crabCfgName = shift;
156 my $jsonFileName = shift;
157 my $lumiSummaryName = shift;
158 my $release = shift;
159
160 my $fileList;
161 my $pset;
162 my $crabCfg;
163 my $jsonFile;
164 my $lumiSummary;
165 my $size = 0;
166 my $nFiles = 0;
167 if ($fileListName)
168 {
169 if (!(-e $fileListName))
170 {
171 print "$fileListName does not exist!\n";
172 exit;
173 }
174 open (FILE_LIST, "<$fileListName");
175 my @fileList = <FILE_LIST>;
176 close (FILE_LIST);
177 $fileList = join ("", @fileList);
178 }
179 if ($psetName)
180 {
181 if (!(-e $psetName))
182 {
183 print "$psetName does not exist!\n";
184 exit;
185 }
186 open (PY_CONFIG, "<$psetName");
187 my @pset = <PY_CONFIG>;
188 close (PY_CONFIG);
189 $pset = join ("", @pset);
190 }
191 if ($crabCfgName)
192 {
193 if (!(-e $crabCfgName))
194 {
195 print "$crabCfgName does not exist!\n";
196 exit;
197 }
198 open (CRAB_CONFIG, "<$crabCfgName");
199 my @crabCfg = <CRAB_CONFIG>;
200 close (CRAB_CONFIG);
201 $crabCfg = join ("", @crabCfg);
202 }
203 if ($jsonFileName)
204 {
205 if (!(-e $jsonFileName))
206 {
207 print "$jsonFileName does not exist!\n";
208 exit;
209 }
210 open (JSON_FILE, "<$jsonFileName");
211 my @jsonFile = <JSON_FILE>;
212 close (JSON_FILE);
213 $jsonFile = join ("", @jsonFile);
214 }
215 if ($lumiSummaryName)
216 {
217 if (!(-e $lumiSummaryName))
218 {
219 print "$lumiSummaryName does not exist!\n";
220 exit;
221 }
222 open (LUMI_SUMMARY, "<$lumiSummaryName");
223 my @lumiSummary = <LUMI_SUMMARY>;
224 close (LUMI_SUMMARY);
225 $lumiSummary = join ("", @lumiSummary);
226 }
227 if ($release)
228 {
229 my $query = "select id from ntupleRelease where name='$release'";
230 $db->selectdb ("ntuple");
231 my $results = $db->query ($query);
232 if ($results->numrows () != 1)
233 {
234 print "Ntuple release \"$release\" not found!\n";
235 exit;
236 }
237 }
238 ($nFiles, $size) = sizeOfDataset ($dataset, $location, $fileList);
239
240 $dataset = addSlashes ($dataset);
241 $user = addSlashes ($user);
242 $format = addSlashes ($format);
243 $pset = addSlashes ($pset);
244 $crabCfg = addSlashes ($crabCfg);
245 $jsonFile = addSlashes ($jsonFile);
246 $lumiSummary = addSlashes ($lumiSummary);
247 $location = addSlashes ($location);
248 $fileList = addSlashes ($fileList);
249 $status = addSlashes ($status);
250 $comment = addSlashes ($comment);
251 $release = addSlashes ($release);
252
253 my $results;
254 my $query;
255 if ($id < 0)
256 {
257 $query = "select max(id) from ntuple";
258 $db->selectdb ("ntuple");
259 $results = $db->query ($query);
260 my @row = $results->fetchrow ();
261 my $id = 1;
262 $id = $row[0] + 1 if $results->numrows ();
263
264 $query = "insert into ntuple (id, dataset, creationTime, lastUpdateTime, user, format, location, fileList, nFiles, sizeInGB, status, comment, pset, crabCfg, jsonFile, lumiSummary, version) values ($id, '$dataset', now(), now(), '$user', '$format', '$location', '$fileList', $nFiles, $size, '$status', '$comment', '$pset', '$crabCfg', '$jsonFile', '$lumiSummary', '$release')";
265 }
266 if ($id > 0)
267 {
268 my $fields;
269 my $values;
270
271 $values .= ", dataset='$dataset'" if $dataset;
272 $values .= ", lastUpdateTime=now()";
273 $values .= ", format='$format'" if $format;
274 $values .= ", location='$location'" if $location;
275 $values .= ", fileList='$fileList'" if $fileList;
276 $values .= ", nFiles=$nFiles" if $nFiles;
277 $values .= ", sizeInGB=$size" if $size;
278 $values .= ", status='$status'" if $status;
279 $values .= ", comment='$comment'" if $comment;
280 $values .= ", pset='$pset'" if $pset;
281 $values .= ", crabCfg='$crabCfg'" if $crabCfg;
282 $values .= ", jsonFile='$jsonFile'" if $jsonFile;
283 $values .= ", lumiSummary='$lumiSummary'" if $lumiSummary;
284 $values .= ", version='$release'" if $release;
285
286 $values =~ s/^, //;
287 $query = "update ntuple set $values where id=$id";
288 }
289 $results = $db->query ($query);
290
291 return $results;
292 }
293
294 sub
295 dbDelete
296 {
297 my $id = shift;
298
299 return if $id < 0;
300 my $query = "delete from ntuple where id=$id";
301 $db->selectdb ("ntuple");
302 my $results = $db->query ($query);
303 }
304
305 sub
306 printHelp
307 {
308 my $command = shift;
309
310 my $exeName = $0;
311 $exeName =~ s/^.*\/([^\/]*)$/$1/;
312
313 if ($command eq "create")
314 {
315 print "Usage: $exeName -l DIRECTORY [OPTION]... create NAME\n";
316 print "Creates an entry in the database for dataset NAME. The ntuples are assumed to\n";
317 print "be present on the Tier 3, with the status being set accordingly.\n";
318 print "\n";
319 print "Mandatory arguments to long options are mandatory for short options too.\n";
320 printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
321 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
322 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
323 printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
324 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
325 printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
326 printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
327 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
328 }
329 elsif ($command eq "update")
330 {
331 print "Usage: $exeName [OPTION]... update NAME\n";
332 print "Updates an existing database entry for dataset NAME.\n";
333 print "\n";
334 print "Mandatory arguments to long options are mandatory for short options too.\n";
335 printf "%-29s%s\n", " -b, --crabCfg FILE", "CRAB config used to submit ntuple jobs";
336 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
337 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format";
338 printf "%-29s%s\n", " -j, --jsonFile FILE", "JSON file used for this dataset";
339 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
340 printf "%-29s%s\n", " -p, --pyConfig FILE", "Python config used to produce ntuples";
341 printf "%-29s%s\n", " -r, --release NAME", "ntuple release used to produce ntuples";
342 printf "%-29s%s\n", " -s, --lumiSummary FILE", "lumiSummary.json reported by CRAB";
343 }
344 elsif ($command eq "finish")
345 {
346 print "Usage: $exeName -l DIRECTORY finish NAME\n";
347
348 print "Finalizes the database entry for dataset NAME, changing its status to\n";
349 print "\"present\". This is intended to be the final step in command-line based ntuple\n";
350 print "production.\n";
351 print "\n";
352 print "Mandatory arguments to long options are mandatory for short options too.\n";
353 printf "%-29s%s\n", " -l, --location DIRECTORY", "directory containing the ntuples";
354 }
355 elsif ($command eq "deleteEntry")
356 {
357 print "Usage: $exeName deleteEntry NAME\n";
358 print "Deletes the database entry for dataset NAME. This is intended primarily for\n";
359 print "mistaken database entries. If you wish to actually delete a set of ntuples,\n";
360 print "please use the \"deprecate\" command instead.\n";
361 }
362 elsif ($command eq "deprecate")
363 {
364 print "Usage: $exeName deprecate NAME\n";
365 print "Marks the dataset NAME for deletion. WARNING: The dataset will be deleted from\n";
366 print "the Tier 3 within one week of this action. If you wish to simply delete a\n";
367 print "mistaken database entry, use the \"deleteEntry\" command instead.\n";
368 }
369 elsif ($command eq "uploadRelease")
370 {
371 print "Usage: $exeName -p FILE -r DIRECTORY [OPTION]... uploadRelease NAME\n";
372 print "Copies an ntuple release to the appropriate area on the Tier 3, and creates a\n";
373 print "database entry for it, with NAME being the name of the release. An ntuple\n";
374 print "release is a CMSSW release with all the necessary packages added for creating\n";
375 print "ntuples, along with a default Python config file.\n";
376 print "\n";
377 print "Mandatory arguments to long options are mandatory for short options too.\n";
378 printf "%-29s%s\n", " -c, --comment COMMENT", "comment for the database entry";
379 printf "%-29s%s\n", " -f, --format FORMAT", "ntuple format (default: BEAN)";
380 printf "%-29s%s\n", " -p, --pyConfig FILE", "default Python config for the release";
381 printf "%-29s%s\n", " -r, --release DIRECTORY", "CMSSW release containing ntuple packages";
382 }
383 else
384 {
385 print "Usage: $exeName [OPTION]... COMMAND NAME\n";
386 print "Manipulates entry in the OSU Tier 3 ntuple database given by NAME.\n";
387 print "\n";
388 print "Mandatory arguments to long options are mandatory for short options too.\n";
389 printf "%-29s%s\n", " -h, --help", "print help. If COMMAND is present, print help";
390 printf "%-29s%s\n", " ", "specific to COMMAND.";
391 print "\n";
392 print "COMMAND may be one of the following:\n";
393 printf "%-29s%s\n", " create", "creates the entry";
394 printf "%-29s%s\n", " update", "updates the entry";
395 printf "%-29s%s\n", " finish", "finalizes the database entry";
396 printf "%-29s%s\n", " deleteEntry", "removes the database entry";
397 printf "%-29s%s\n", " deprecate", "marks the dataset for deletion";
398 printf "%-29s%s\n", " uploadRelease", "upload an ntuple release";
399 }
400
401 exit;
402 }
403
404 sub
405 sizeOfDataset
406 {
407 my $dataset = shift;
408 my $location = shift;
409 my $fileList = shift;
410
411 my $query = "select nFiles from ntuple where dataset='$dataset'";
412 $db->selectdb ("ntuple");
413 my $results = $db->query ($query);
414 my @row = $results->fetchrow ();
415 my $previousNFiles = 0;
416 $previousNFiles = $row[0] if $results->numrows ();
417
418 my $size = 0.0;
419 my $nFiles = 0;
420 if ($location && !$fileList && $previousNFiles < 1)
421 {
422 if (!(-e $location))
423 {
424 print "$location does not exist!\n";
425 exit;
426 }
427 $nFiles = `ls $location | wc -l`;
428 $size = `du -s $location`;
429 $size =~ s/([^ ]*) .*/$1/;
430 $size /= 1024 * 1024;
431 $size = sprintf "%.2f", $size;
432 }
433 elsif ($fileList)
434 {
435 foreach my $file (split (/\n/, $fileList))
436 {
437 $nFiles++;
438 my $fileSize = `du -s $file`;
439 $fileSize =~ s/([^ ]*) .*/$1/;
440 $size += $fileSize;
441 }
442 $size /= 1024 * 1024;
443 $size = sprintf "%.2f", $size;
444 }
445
446 return ($nFiles, $size);
447 }
448
449 sub
450 uploadRelease
451 {
452 my $format = shift;
453 my $cmsswRelease = shift;
454 my $release = shift;
455 my $psetName = shift;
456 my $name = shift;
457 my $user = shift;
458 my $comment = shift;
459
460 move ($release, "/home/hart/public_html/releases/$name.tar.gz") or die "Ntuple releases may only be uploaded on the Tier 3, stopped";
461 if (!(-e $psetName))
462 {
463 print "$psetName does not exist!\n";
464 exit;
465 }
466 open (PY_CONFIG, "<$psetName");
467 my @pset = <PY_CONFIG>;
468 close (PY_CONFIG);
469 my $pset = join ("", @pset);
470
471 my $query = "select max(id) from ntupleRelease";
472 $db->selectdb ("ntuple");
473 my $results = $db->query ($query);
474 my @row = $results->fetchrow ();
475 my $id = 1;
476 $id = $row[0] + 1 if $results->numrows ();
477
478 $name = addSlashes ($name);
479 $pset = addSlashes ($pset);
480 $user = addSlashes ($user);
481 $format = addSlashes ($format);
482 $cmsswRelease = addSlashes ($cmsswRelease);
483 $comment = addSlashes ($comment);
484
485 my $query = "insert into ntupleRelease (id, name, pset, user, pending, format, cmsswRelease, comment) values ($id, '$name', '$pset', '$user', 1, '$format', '$cmsswRelease', '$comment')";
486 $db->selectdb ("ntuple");
487 my $results = $db->query ($query);
488 }