ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
(Generate patch)

Comparing UserCode/OSUT3Analysis/DBTools/scripts/crabclean (file contents):
Revision 1.1 by ahart, Mon Aug 27 18:02:03 2012 UTC vs.
Revision 1.4 by ahart, Sat Mar 9 11:58:55 2013 UTC

# Line 2 | Line 2
2  
3   use strict;
4   use Getopt::Long;
5 + use Mysql;
6 +
7 + sub printHelp;
8 + sub parseCrabDir;
9 +
10 + our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
11  
12   my %opt;
13   Getopt::Long::Configure ("bundling");
14 < GetOptions (\%opt, "force|f", "help|h");
14 > GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
15 >
16 > printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
17 > my $crabDir = $ARGV[0];
18 > my $outputDir = $ARGV[0];
19 > my $dataset = $ARGV[0];
20 > $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
21  
22 < printHelp () if $opt{"help"} || !$ARGV[0];
23 < opendir (CRAB_OUTPUT, "$ARGV[0]");
22 > if ($opt{"dataset"})
23 >  {
24 >    my $query = "select workingDirectory,location,dataset,user,creationTime from ntuple where dataset like '$dataset' order by creationTime";
25 >    $db->selectdb ("ntuple");
26 >    my $results = $db->query ($query);
27 >    if ($results->numrows () == 1)
28 >      {
29 >        my @row = $results->fetchrow ();
30 >        $crabDir = "$row[0]/ntuple";
31 >        $outputDir = $row[1];
32 >      }
33 >    if ($results->numrows () == 0)
34 >      {
35 >        print "Database entry does not exist.\n";
36 >        exit;
37 >      }
38 >    my %workingDir;
39 >    my %location;
40 >    print "Found multiple database entries matching\n";
41 >    print "\"$dataset\":\n";
42 >    for (my $i = 1; $i <= $results->numrows (); $i++)
43 >      {
44 >        my @row = $results->fetchrow ();
45 >        $workingDir{"$i"} = $row[0];
46 >        $location{"$i"} = $row[1];
47 >        printf "(%2d) $row[2]\n", $i;
48 >        print "     created by $row[3] on $row[4]\n";
49 >      }
50 >    print "\nWhich entry would you like to use?: ";
51 >    my $response = <STDIN>;
52 >    $response =~ s/[ \t\n]//g;
53 >    if (!(exists $workingDir{$response}))
54 >      {
55 >        print "Your selection \"$response\" was not a valid option! Quitting.\n";
56 >        exit;
57 >      }
58 >    $crabDir = "$workingDir{$response}/ntuple";
59 >    $outputDir = $location{$response};
60 >  }
61 >
62 > if (!(-e $outputDir) || !(-d $outputDir))
63 >  {
64 >    print "Output directory $outputDir does not exist!\n";
65 >    exit;
66 >  }
67 > opendir (CRAB_OUTPUT, "$outputDir");
68   my @crabOutput = readdir (CRAB_OUTPUT);
69   closedir (CRAB_OUTPUT);
70   my %filesToKeep;
71 < foreach my $file (@crabOutput)
71 > if (!$opt{"dumb"})
72    {
73 <    next if ($file eq "." || $file eq "..");
18 <    next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
19 <    my $jobNumber = $file;
20 <    my $submissionNumber = $file;
21 <    $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
22 <    $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
23 <    if (!(defined $filesToKeep{$jobNumber})
24 <     || $filesToKeep{$jobNumber} < $submissionNumber)
73 >    if (!(-e  $crabDir) || !(-d $crabDir))
74        {
75 <        $filesToKeep{$jobNumber} = $submissionNumber;
75 >        print "CRAB directory $crabDir does not exist!\n";
76 >        exit;
77 >      }
78 >    parseCrabDir ($crabDir, \%filesToKeep);
79 >  }
80 > else
81 >  {
82 >    foreach my $file (@crabOutput)
83 >      {
84 >        next if ($file eq "." || $file eq "..");
85 >        next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
86 >        my $jobNumber = $file;
87 >        my $submissionNumber = $file;
88 >        $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
89 >        $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
90 >        if (!(defined $filesToKeep{$jobNumber})
91 >         || $filesToKeep{$jobNumber} < $submissionNumber)
92 >          {
93 >            $filesToKeep{$jobNumber} = $submissionNumber;
94 >          }
95        }
96    }
97   foreach my $file (@crabOutput)
# Line 34 | Line 102 | foreach my $file (@crabOutput)
102      my $submissionNumber = $file;
103      $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
104      $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
105 <    if ($filesToKeep{$jobNumber} != $submissionNumber)
105 >    if (!(defined $filesToKeep{$jobNumber}))
106 >      {
107 >        print "No information found for job $jobNumber!\n";
108 >      }
109 >    elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
110 >        || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
111        {
112 <        system ("rm -f $ARGV[0]/$file") if $opt{"force"};
113 <        system ("rm -i $ARGV[0]/$file") if !$opt{"force"};
112 >        system ("rm -f $outputDir/$file") if $opt{"force"};
113 >        system ("rm -i $outputDir/$file") if !$opt{"force"};
114        }
115    }
116  
# Line 47 | Line 120 | printHelp
120    my $exeName = $0;
121    $exeName =~ s/^.*\/([^\/]*)$/$1/;
122  
123 <  print "Usage: $exeName [OPTION]... DIRECTORIES\n";
124 <  print "Deletes excess EDM output caused by multiple CRAB submissions.\n";
123 >  print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
124 >  print "  or: $exeName -d [OPTION]... OUTPUT_DIR\n";
125 >  print "  or: $exeName -s [OPTION]... DATASET\n";
126 >  print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
127 >  print "contained in the CRAB working directory provided.\n";
128    print "\n";
129    print "Mandatory arguments to long options are mandatory for short options too.\n";
130 +  printf "%-29s%s\n", "  -d, --dumb", "tries to use the filenames of the output instead of";
131 +  printf "%-29s%s\n", "            ", "the results from the CRAB working directory; useful";
132 +  printf "%-29s%s\n", "            ", "if the working directory has been lost";
133    printf "%-29s%s\n", "  -f, --force", "never prompt (default is to prompt before any";
134    printf "%-29s%s\n", "             ", "removal)";
135    printf "%-29s%s\n", "  -h, --help", "print this help message";
136 +  printf "%-29s%s\n", "  -s, --dataset", "tells the script to expect a dataset name instead";
137 +  printf "%-29s%s\n", "               ", "of a directory";
138  
139    exit;
140   }
141 +
142 + sub
143 + parseCrabDir
144 + {
145 +  my $crabDir = shift;
146 +  my $filesToKeep = shift;
147 +
148 +  opendir (CRAB_RESULTS, "$crabDir/res");
149 +  my @crabResults = readdir (CRAB_RESULTS);
150 +  closedir (CRAB_RESULTS);
151 +  for my $file (@crabResults)
152 +    {
153 +      next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
154 +      my $jobNumber = $file;
155 +      $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
156 +      my @status = `grep 'ExitStatus' $crabDir/res/$file`;
157 +      my $wrapperStatus = $status[0];
158 +      $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
159 +      $wrapperStatus =~ s/\n//g;
160 +      if ($wrapperStatus != 0)
161 +        {
162 +          print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
163 +          next;
164 +        }
165 +      my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`;
166 +      my $fileName = $surlForGrid[1];
167 +      $fileName =~ s/^.*\/([^\/]*)\n/$1/;
168 +      $filesToKeep->{$jobNumber} = $fileName;
169 +    }
170 + }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines