ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.5
Committed: Tue Mar 12 00:13:26 2013 UTC (12 years, 1 month ago) by ahart
Branch: MAIN
CVS Tags: V01-01-00, V01-00-01, V01-00-00, V00-01-00
Changes since 1.4: +23 -20 lines
Log Message:
Fixed a bug relating to when multiple database entries match the dataset name.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Getopt::Long;
5 use Mysql;
6
7 sub printHelp;
8 sub parseCrabDir;
9
10 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
11
12 my %opt;
13 Getopt::Long::Configure ("bundling");
14 GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
15
16 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
17 my $crabDir = $ARGV[0];
18 my $outputDir = $ARGV[0];
19 my $dataset = $ARGV[0];
20 $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
21
22 if ($opt{"dataset"})
23 {
24 my $query = "select workingDirectory,location,dataset,user,creationTime from ntuple where dataset like '$dataset' order by creationTime";
25 $db->selectdb ("ntuple");
26 my $results = $db->query ($query);
27 if ($results->numrows () == 1)
28 {
29 my @row = $results->fetchrow ();
30 $crabDir = "$row[0]/ntuple";
31 $outputDir = $row[1];
32 }
33 if ($results->numrows () == 0)
34 {
35 print "Database entry does not exist.\n";
36 exit;
37 }
38 if ($results->numrows () > 1)
39 {
40 my %workingDir;
41 my %location;
42 print "Found multiple database entries matching\n";
43 print "\"$dataset\":\n";
44 for (my $i = 1; $i <= $results->numrows (); $i++)
45 {
46 my @row = $results->fetchrow ();
47 $workingDir{"$i"} = $row[0];
48 $location{"$i"} = $row[1];
49 printf "(%2d) $row[2]\n", $i;
50 print " created by $row[3] on $row[4]\n";
51 }
52 print "\nWhich entry would you like to use?: ";
53 my $response = <STDIN>;
54 $response =~ s/[ \t\n]//g;
55 if (!(exists $workingDir{$response}))
56 {
57 print "Your selection \"$response\" was not a valid option! Quitting.\n";
58 exit;
59 }
60 $crabDir = "$workingDir{$response}/ntuple";
61 $outputDir = $location{$response};
62 }
63 }
64
65 if (!(-e $outputDir) || !(-d $outputDir))
66 {
67 print "Output directory $outputDir does not exist!\n";
68 exit;
69 }
70 opendir (CRAB_OUTPUT, "$outputDir");
71 my @crabOutput = readdir (CRAB_OUTPUT);
72 closedir (CRAB_OUTPUT);
73 my %filesToKeep;
74 if (!$opt{"dumb"})
75 {
76 if (!(-e $crabDir) || !(-d $crabDir))
77 {
78 print "CRAB directory $crabDir does not exist!\n";
79 exit;
80 }
81 parseCrabDir ($crabDir, \%filesToKeep);
82 }
83 else
84 {
85 foreach my $file (@crabOutput)
86 {
87 next if ($file eq "." || $file eq "..");
88 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
89 my $jobNumber = $file;
90 my $submissionNumber = $file;
91 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
92 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
93 if (!(defined $filesToKeep{$jobNumber})
94 || $filesToKeep{$jobNumber} < $submissionNumber)
95 {
96 $filesToKeep{$jobNumber} = $submissionNumber;
97 }
98 }
99 }
100 foreach my $file (@crabOutput)
101 {
102 next if ($file eq "." || $file eq "..");
103 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
104 my $jobNumber = $file;
105 my $submissionNumber = $file;
106 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
107 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
108 if (!(defined $filesToKeep{$jobNumber}))
109 {
110 print "No information found for job $jobNumber!\n";
111 }
112 elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
113 || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
114 {
115 system ("rm -f $outputDir/$file") if $opt{"force"};
116 system ("rm -i $outputDir/$file") if !$opt{"force"};
117 }
118 }
119
120 sub
121 printHelp
122 {
123 my $exeName = $0;
124 $exeName =~ s/^.*\/([^\/]*)$/$1/;
125
126 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
127 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
128 print " or: $exeName -s [OPTION]... DATASET\n";
129 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
130 print "contained in the CRAB working directory provided.\n";
131 print "\n";
132 print "Mandatory arguments to long options are mandatory for short options too.\n";
133 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
134 printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
135 printf "%-29s%s\n", " ", "if the working directory has been lost";
136 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
137 printf "%-29s%s\n", " ", "removal)";
138 printf "%-29s%s\n", " -h, --help", "print this help message";
139 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
140 printf "%-29s%s\n", " ", "of a directory";
141
142 exit;
143 }
144
145 sub
146 parseCrabDir
147 {
148 my $crabDir = shift;
149 my $filesToKeep = shift;
150
151 opendir (CRAB_RESULTS, "$crabDir/res");
152 my @crabResults = readdir (CRAB_RESULTS);
153 closedir (CRAB_RESULTS);
154 for my $file (@crabResults)
155 {
156 next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
157 my $jobNumber = $file;
158 $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
159 my @status = `grep 'ExitStatus' $crabDir/res/$file`;
160 my $wrapperStatus = $status[0];
161 $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
162 $wrapperStatus =~ s/\n//g;
163 if ($wrapperStatus != 0)
164 {
165 print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
166 next;
167 }
168 my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`;
169 my $fileName = $surlForGrid[1];
170 $fileName =~ s/^.*\/([^\/]*)\n/$1/;
171 $filesToKeep->{$jobNumber} = $fileName;
172 }
173 }