ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/JOBROBOT/TaskProfile
Revision: 1.3
Committed: Thu Oct 27 16:44:21 2005 UTC (19 years, 6 months ago) by lat
Branch: MAIN
CVS Tags: PHEDEX_V2_2_20051102
Changes since 1.2: +3 -4 lines
Log Message:
Support multiple task repositories.

File Contents

# User Rev Content
1 lat 1.1 #!/usr/bin/env perl
2    
3     ##H This drop box agent reports status of submitted jobs by maintaining
4     ##H a simple CSV file. It maintains the counts by site, job type, the
5     ##H owner/dataset, job submission status, and job exit code.
6     ##H
7     ##H Usage:
8     ##H TaskProfile
9     ##H -state DIRECTORY [-next NEXT] [-wait SECS]
10     ##H -taskrepo DIRECTORY
11     ##H
12     ##H -state agent state directory, including inbox
13     ##H -next next agent to pass the drops to; can be given several times
14     ##H -wait time to wait in seconds between work scans
15     ##H -taskrepo directory with all the tasks in it
16    
17     BEGIN {
18     use strict; use warnings; $^W=1;
19     our $me = $0; $me =~ s|.*/||;
20     our $home = $0; $home =~ s|/[^/]+$||; $home ||= "."; $home .= "/../../Toolkit/Common";
21     unshift(@INC, $home);
22     }
23    
24     ######################################################################
25     use UtilsHelp;
26     while (scalar @ARGV)
27     {
28     if ($ARGV[0] eq '-state' && scalar @ARGV > 1)
29     { shift (@ARGV); $args{DROPDIR}= shift(@ARGV); }
30     elsif ($ARGV[0] eq '-next' && scalar @ARGV > 1)
31     { shift (@ARGV); push (@{$args{NEXTDIR}}, shift(@ARGV)); }
32     elsif ($ARGV[0] eq '-wait' && scalar @ARGV > 1)
33     { shift (@ARGV); $args{WAITTIME} = shift(@ARGV); }
34     elsif ($ARGV[0] eq '-taskrepo' && scalar @ARGV > 1)
35 lat 1.3 { shift (@ARGV); push(@{$args{TASKREPO}}, shift(@ARGV)); }
36 lat 1.1 elsif ($ARGV[0] eq '-h')
37     { &usage(); }
38     else
39     { last; }
40     }
41    
42     if (@ARGV || !$args{DROPDIR} || !$args{TASKREPO})
43     {
44     die "Insufficient parameters, use -h for help.\n";
45     }
46    
47     (new TaskProfile (%args))->process();
48    
49     ######################################################################
50     # Routines specific to this agent.
51     package TaskProfile; use strict; use warnings; use base 'UtilsAgent';
52     use UtilsCommand;
53     use UtilsLogging;
54     use UtilsTiming;
55    
56     sub new
57     {
58     my $proto = shift;
59     my $class = ref($proto) || $proto;
60     my $self = $class->SUPER::new(@_);
61     my %params = (TASKREPO => undef); # task base directory
62     my %args = (@_);
63     map { $self->{$_} = $args{$_} || $params{$_} } keys %params;
64     bless $self, $class;
65     return $self;
66     }
67    
68     # Find out how many jobs are pending for each site. This is
69     # insensitive to the job type, and we only check once in the
70     # beginning to avoid favouring one dataset over another --
71     # once we decide to proceed for a site, we submit jobs for
72     # all datasets.
73     sub getSiteStatus
74     {
75     my ($self) = @_;
76     my $result = {};
77 lat 1.2 my %statusname = ('X' => 'Initial', 'C' => 'Created', 'N' => 'No Input',
78     'S' => 'Submitted', 'P' => 'Pre-retrieve', 'R' => 'Retrieved',
79     'A' => 'Aborted', 'K' => 'Cancelled');
80    
81 lat 1.3 foreach my $site (map { (<$_/*>) } @{$self->{TASKREPO}})
82 lat 1.1 {
83     my ($sitename) = ($site =~ m|.*/(.*)|);
84     foreach my $taskdir (<$site/*/*>)
85     {
86     # Match components from name SC3.FNAL.ExSimHitStatistics.jm03b_qcd_20_30.jm_Hit245_2_g133.1
87     my ($apptype, $owner, $dataset) = ($taskdir =~ m!.*/SC3\.[^.]+\.([^.]+)\.(\S+)\.([^.]+)\.\d+!);
88     my $crabdir = (<$taskdir/crab_*>)[0];
89     my (@jobids, @jobstat);
90     @jobids = split(/\n/, &input("$crabdir/log/scheduler_id.log") || '') if $crabdir;
91     @jobstat = split(/\n/, &input("$crabdir/share/scripts.list") || '') if $crabdir;
92     my $crabinfo = &input("$taskdir/JOB_STATUS_LOG.txt") || '';
93     $crabinfo =~ s/.*\nSTART \d//s;
94     my %crabstat = map { /^ JOB\s+\d+:\s+(\S+)\s+STATUS:\s+(.*)/ ? ($1 => $2) : () }
95     grep (/^ JOB/, split(/\n/, $crabinfo));
96    
97     for (my $i = 0; $i <= $#jobstat; ++$i)
98     {
99     # Determine job status according to CRAB and from log.
100     my $status = (split(/ /, $jobstat[$i]))[1];
101 lat 1.2 my $statusname = $statusname{$status};
102 lat 1.1 if ($status =~ /^[XCN]$/)
103     {
104 lat 1.2 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{""} ||= 0;
105     $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{""}++;
106 lat 1.1 }
107     elsif ($status =~ /^[RPAKS]$/)
108     {
109     # Figure out what last job status said
110 lat 1.3 my $jobstat = $crabstat{$jobids[$i] || ''} || '';
111 lat 1.1 my ($stat, $exit) = ($jobstat =~ /(\S+)(?:\s+EXIT_CODE:\s+(\d+))?/);
112     $exit = '' if ! defined $exit;
113    
114     # If CRAB says it was aborted, trust it
115     $status = 'A' if (defined $stat && $stat eq 'Aborted');
116 lat 1.2 $statusname = $statusname{$status};
117     $statusname = $stat if (defined $stat && $status eq 'S');
118 lat 1.1
119     # Now record.
120 lat 1.2 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{$exit} ||= 0;
121     $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{$exit}++;
122 lat 1.1 }
123     }
124     }
125     }
126    
127     return $result;
128     }
129    
130     sub idle
131     {
132     my ($self, @pending) = @_;
133    
134     # Get status of how busy the sites are. We obtain this only once
135     # in order to not favour datasets "early on" in the list.
136     my $stats = $self->getSiteStatus ();
137     my $now = time();
138    
139 lat 1.2 my $output = "Time,Site,App,Owner,Dataset,Status,Exitcode,Count\n";
140 lat 1.1 foreach my $site (keys %$stats) {
141     foreach my $app (keys %{$stats->{$site}}) {
142     foreach my $owner (keys %{$stats->{$site}{$app}}) {
143     foreach my $ds (keys %{$stats->{$site}{$app}{$owner}}) {
144     foreach my $status (keys %{$stats->{$site}{$app}{$owner}{$ds}}) {
145     foreach my $exit (keys %{$stats->{$site}{$app}{$owner}{$ds}{$status}}) {
146     my $val = $stats->{$site}{$app}{$owner}{$ds}{$status}{$exit};
147     $output .= "$now,$site,$app,$owner,$ds,$status,$exit,$val\n";
148     }
149     }
150     }
151     }
152     }
153     }
154    
155     &output ("$self->{DROPDIR}/jobstatus.csv", $output);
156    
157     $self->nap ($self->{WAITTIME});
158     }