ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
(Generate patch)

Comparing UserCode/OSUT3Analysis/DBTools/scripts/crabclean (file contents):
Revision 1.1 by ahart, Mon Aug 27 18:02:03 2012 UTC vs.
Revision 1.7 by ahart, Thu May 23 20:38:44 2013 UTC

# Line 2 | Line 2
2  
3   use strict;
4   use Getopt::Long;
5 + use Mysql;
6 + use Term::ANSIColor;
7 +
8 + sub printHelp;
9 + sub parseCrabDir;
10 +
11 + our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
12  
13   my %opt;
14   Getopt::Long::Configure ("bundling");
15 < GetOptions (\%opt, "force|f", "help|h");
15 > GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
16 >
17 > printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
18 > my $crabDir = $ARGV[0];
19 > my $outputDir = $ARGV[0];
20 > my $dataset = $ARGV[0];
21 > $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
22 >
23 > if ($opt{"dataset"})
24 >  {
25 >    my $queryDataset = $dataset;
26 >    $queryDataset =~ s/\*/%/g;
27 >    $queryDataset =~ s/(.*)/%$1%/g;
28 >    my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
29 >    $db->selectdb ("ntuple");
30 >    my $results = $db->query ($query);
31 >    if ($results->numrows () == 1)
32 >      {
33 >        my @row = $results->fetchrow ();
34 >        $crabDir = "$row[0]/ntuple";
35 >        $outputDir = $row[1];
36 >      }
37 >    if ($results->numrows () == 0)
38 >      {
39 >        print "Database entry does not exist.\n";
40 >        exit;
41 >      }
42 >    if ($results->numrows () > 1)
43 >      {
44 >        my %workingDir;
45 >        my %location;
46 >        print "Found multiple database entries matching\n";
47 >        print "\"$dataset\":\n";
48 >        for (my $i = 1; $i <= $results->numrows (); $i++)
49 >          {
50 >            my @row = $results->fetchrow ();
51 >            $row[4] =~ s/([^ ]*) [^ ]*/$1/g;
52 >            $workingDir{"$i"} = $row[0];
53 >            $location{"$i"} = $row[1];
54 >            printf "(%2d) $row[2]\n", $i;
55 >            print "     (";
56 >            print color "green" if $row[5] eq "present";
57 >            print color "bold yellow" if $row[5] eq "submitted";
58 >            print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
59 >            print $row[5];
60 >            print color "reset";
61 >            print ") created by $row[3] on $row[4]\n";
62 >          }
63 >        print "\nWhich entry would you like to use?: ";
64 >        my $response = <STDIN>;
65 >        $response =~ s/[ \t\n]//g;
66 >        if (!(exists $workingDir{$response}))
67 >          {
68 >            print "Your selection \"$response\" was not a valid option! Quitting.\n";
69 >            exit;
70 >          }
71 >        $crabDir = "$workingDir{$response}/ntuple";
72 >        $outputDir = $location{$response};
73 >      }
74 >  }
75  
76 < printHelp () if $opt{"help"} || !$ARGV[0];
77 < opendir (CRAB_OUTPUT, "$ARGV[0]");
76 > if (!(-e $outputDir) || !(-d $outputDir))
77 >  {
78 >    print "Output directory $outputDir does not exist!\n";
79 >    exit;
80 >  }
81 > opendir (CRAB_OUTPUT, "$outputDir");
82   my @crabOutput = readdir (CRAB_OUTPUT);
83   closedir (CRAB_OUTPUT);
84   my %filesToKeep;
85 < foreach my $file (@crabOutput)
85 > if (!$opt{"dumb"})
86    {
87 <    next if ($file eq "." || $file eq "..");
18 <    next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
19 <    my $jobNumber = $file;
20 <    my $submissionNumber = $file;
21 <    $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
22 <    $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
23 <    if (!(defined $filesToKeep{$jobNumber})
24 <     || $filesToKeep{$jobNumber} < $submissionNumber)
87 >    if (!(-e  $crabDir) || !(-d $crabDir))
88        {
89 <        $filesToKeep{$jobNumber} = $submissionNumber;
89 >        print "CRAB directory $crabDir does not exist!\n";
90 >        exit;
91        }
92 +    parseCrabDir ($crabDir, \%filesToKeep);
93    }
94 + else
95 +  {
96 +    foreach my $file (@crabOutput)
97 +      {
98 +        next if ($file eq "." || $file eq "..");
99 +        next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
100 +        my $jobNumber = $file;
101 +        my $submissionNumber = $file;
102 +        $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
103 +        $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
104 +        if (!(defined $filesToKeep{$jobNumber})
105 +         || $filesToKeep{$jobNumber} < $submissionNumber)
106 +          {
107 +            $filesToKeep{$jobNumber} = $submissionNumber;
108 +          }
109 +      }
110 +  }
111 + opendir (CRAB_OUTPUT, "$outputDir");
112 + my @crabOutput = readdir (CRAB_OUTPUT);
113 + closedir (CRAB_OUTPUT);
114 + my $nFiles = @crabOutput - 2;
115 + print "Before cleaning: $nFiles files\n";
116   foreach my $file (@crabOutput)
117    {
118      next if ($file eq "." || $file eq "..");
# Line 34 | Line 121 | foreach my $file (@crabOutput)
121      my $submissionNumber = $file;
122      $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
123      $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
124 <    if ($filesToKeep{$jobNumber} != $submissionNumber)
124 >    if (!(defined $filesToKeep{$jobNumber}))
125        {
126 <        system ("rm -f $ARGV[0]/$file") if $opt{"force"};
127 <        system ("rm -i $ARGV[0]/$file") if !$opt{"force"};
126 >        print "No information found for job $jobNumber!\n";
127 >      }
128 >    elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
129 >        || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
130 >      {
131 >        system ("rm -f $outputDir/$file") if $opt{"force"};
132 >        system ("rm -i $outputDir/$file") if !$opt{"force"};
133        }
134    }
135 + opendir (CRAB_OUTPUT, "$outputDir");
136 + my @crabOutput = readdir (CRAB_OUTPUT);
137 + closedir (CRAB_OUTPUT);
138 + my $nFiles = @crabOutput - 2;
139 + print "After cleaning:  $nFiles files\n";
140  
141   sub
142   printHelp
# Line 47 | Line 144 | printHelp
144    my $exeName = $0;
145    $exeName =~ s/^.*\/([^\/]*)$/$1/;
146  
147 <  print "Usage: $exeName [OPTION]... DIRECTORIES\n";
148 <  print "Deletes excess EDM output caused by multiple CRAB submissions.\n";
147 >  print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
148 >  print "  or: $exeName -d [OPTION]... OUTPUT_DIR\n";
149 >  print "  or: $exeName -s [OPTION]... DATASET\n";
150 >  print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
151 >  print "contained in the CRAB working directory provided.\n";
152    print "\n";
153    print "Mandatory arguments to long options are mandatory for short options too.\n";
154 +  printf "%-29s%s\n", "  -d, --dumb", "tries to use the filenames of the output instead of";
155 +  printf "%-29s%s\n", "            ", "the results from the CRAB working directory; useful";
156 +  printf "%-29s%s\n", "            ", "if the working directory has been lost";
157    printf "%-29s%s\n", "  -f, --force", "never prompt (default is to prompt before any";
158    printf "%-29s%s\n", "             ", "removal)";
159    printf "%-29s%s\n", "  -h, --help", "print this help message";
160 +  printf "%-29s%s\n", "  -s, --dataset", "tells the script to expect a dataset name instead";
161 +  printf "%-29s%s\n", "               ", "of a directory";
162  
163    exit;
164   }
165 +
166 + sub
167 + parseCrabDir
168 + {
169 +  my $crabDir = shift;
170 +  my $filesToKeep = shift;
171 +
172 +  opendir (CRAB_RESULTS, "$crabDir/res");
173 +  my @crabResults = readdir (CRAB_RESULTS);
174 +  closedir (CRAB_RESULTS);
175 +  for my $file (@crabResults)
176 +    {
177 +      next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
178 +      my $jobNumber = $file;
179 +      $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
180 +      my @status = `grep 'ExitStatus' $crabDir/res/$file`;
181 +      my $wrapperStatus = $status[0];
182 +      $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
183 +      $wrapperStatus =~ s/\n//g;
184 +      if ($wrapperStatus != 0)
185 +        {
186 +          print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
187 +          next;
188 +        }
189 +      my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`;
190 +      my $fileName = $surlForGrid[1];
191 +      $fileName =~ s/^.*\/([^\/]*)\n/$1/;
192 +      $filesToKeep->{$jobNumber} = $fileName;
193 +    }
194 + }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines