7 |
|
# |
8 |
|
# Interface |
9 |
|
# --------- |
10 |
< |
# new() : A new ActiveDoc object |
10 |
> |
# new(cache,dbstore) : A new ActiveDoc object |
11 |
|
# url() : Return/set the docs url - essential |
12 |
|
# file() : Return the local filename of document |
13 |
+ |
# ProcessFile() : Return the filename of PreProcessed document |
14 |
|
# |
15 |
< |
# parse(parselabel): Parse the document file for the given parse level |
15 |
< |
# newparse(parselabel) : Create a new parse type |
16 |
< |
# addtag(parselabel,tagname,start,obj,text,obj,end,obj) |
17 |
< |
# : Add tags to the parse given by label |
18 |
< |
# newdoc(file) : Return an new object of the appropriate type |
15 |
> |
# parent() : return the object ref of the calling parent |
16 |
|
# getfile(url) : get a processedfile object given a url |
17 |
< |
# config([ActiveConfig]) : Set up/return Configuration for the document |
21 |
< |
# basequery([ActiveConfig]) : Set up/return UserQuery for the doc |
22 |
< |
# copydocconfig(ActiveDoc) : Copy the basic configuration from the ActiveDoc |
23 |
< |
# copydocquery(ActiveDoc) : Copy the basicquery from the ActiveDoc |
17 |
> |
# activatedoc(url) : Return the object ref for a doc described by the given url |
18 |
|
# |
19 |
|
# -- error methods -- |
20 |
|
# error(string) : Report an general error to the user |
21 |
|
# parseerror(string) : Report an error during parsing a file |
22 |
< |
# line() : Return the current line number of the document |
23 |
< |
# and the ProcessedFileObj it is in |
22 |
> |
# line([linenumber]) : Return the line number of the document |
23 |
> |
# and the ProcessedFileObj it is in corresponding to the |
24 |
> |
# supplied number of the expanded document |
25 |
> |
# If no number supplied - the currentparse number will be # used |
26 |
|
|
27 |
|
package ActiveDoc::ActiveDoc; |
28 |
|
require 5.004; |
29 |
< |
use ActiveDoc::Parse; |
34 |
< |
use ActiveDoc::ActiveConfig; |
29 |
> |
use ActiveDoc::SimpleURLDoc; |
30 |
|
use ActiveDoc::PreProcessedFile; |
31 |
< |
use ObjectUtilities::ObjectBase; |
37 |
< |
use URL::URLhandler; |
31 |
> |
use Utilities::Verbose; |
32 |
|
|
33 |
< |
@ISA = qw(ObjectUtilities::ObjectBase); |
33 |
> |
@ISA = qw(ActiveDoc::SimpleURLDoc Utilities::Verbose); |
34 |
|
|
35 |
|
sub new { |
36 |
|
my $class=shift; |
37 |
< |
$self={}; |
37 |
> |
my $self={}; |
38 |
|
bless $self, $class; |
39 |
< |
$self->config(shift); |
40 |
< |
|
41 |
< |
# A URL handler per document |
42 |
< |
$self->{urlhandler}=URL::URLhandler->new($self->config()->cache()); |
49 |
< |
|
50 |
< |
$self->init(@_); |
39 |
> |
$self->cache(shift); |
40 |
> |
$self->{dbstore}=shift; |
41 |
> |
$self->_initdoc("doc",@_); |
42 |
> |
# $self->{switch}=ActiveDoc::SimpleURLDoc->new($self->{cache}); |
43 |
|
return $self; |
44 |
|
} |
45 |
|
|
54 |
– |
# ----- parse related routines -------------- |
55 |
– |
sub parse { |
56 |
– |
my $self=shift; |
57 |
– |
$parselabel=shift; |
58 |
– |
|
59 |
– |
my $file=$self->file(); |
60 |
– |
print "Parse called on file $file\n"; |
61 |
– |
if ( $file ) { |
62 |
– |
$self->{parsers}{$parselabel}->parse($file,@_); |
63 |
– |
} |
64 |
– |
else { |
65 |
– |
print "Cannot parse - file not known\n"; |
66 |
– |
} |
67 |
– |
} |
68 |
– |
|
69 |
– |
sub newparse { |
70 |
– |
my $self=shift; |
71 |
– |
my $parselabel=shift; |
72 |
– |
|
73 |
– |
$self->{parsers}{$parselabel}=ActiveDoc::Parse->new(); |
74 |
– |
$self->{parsers}{$parselabel}->addignoretags(); |
75 |
– |
$self->{parsers}{$parselabel}->addgrouptags(); |
76 |
– |
} |
77 |
– |
|
78 |
– |
sub addtag { |
79 |
– |
my $self=shift; |
80 |
– |
my $parselabel=shift; |
81 |
– |
if ( $#_ != 6 ) { |
82 |
– |
$self->error("Incorrect addtags specification\n". |
83 |
– |
"called with :\n@_ \n"); |
84 |
– |
} |
85 |
– |
$self->{parsers}{$parselabel}->addtag(@_); |
86 |
– |
} |
87 |
– |
|
88 |
– |
sub addurltags { |
89 |
– |
my $self=shift; |
90 |
– |
my $parselabel=shift; |
91 |
– |
|
92 |
– |
$self->{parsers}{$parselabel}-> |
93 |
– |
addtag("Base", \&Base_start, $self, "", $self, |
94 |
– |
\&Base_end, $self); |
95 |
– |
} |
96 |
– |
|
46 |
|
sub url { |
47 |
|
my $self=shift; |
48 |
< |
@_ ?$self->{File}=$self->getfile(shift) |
49 |
< |
: $self->{File}; |
50 |
< |
} |
51 |
< |
|
52 |
< |
sub copydocconfig { |
53 |
< |
my $self=shift; |
54 |
< |
my $ActiveDoc=shift; |
55 |
< |
|
56 |
< |
$self->config($ActiveDoc->config()); |
57 |
< |
|
58 |
< |
} |
110 |
< |
|
111 |
< |
sub copydocquery { |
112 |
< |
my $self=shift; |
113 |
< |
my $ActiveDoc=shift; |
114 |
< |
|
115 |
< |
$self->basequery($ActiveDoc->basequery()); |
116 |
< |
} |
117 |
< |
|
118 |
< |
sub config { |
119 |
< |
my $self=shift; |
120 |
< |
@_?$self->{ActiveConfig}=shift |
121 |
< |
: $self->{ActiveConfig}; |
122 |
< |
} |
123 |
< |
|
124 |
< |
sub basequery { |
125 |
< |
my $self=shift; |
126 |
< |
@_ ? $self->{UserQuery}=shift |
127 |
< |
: $self->{UserQuery}; |
48 |
> |
# get file & preprocess |
49 |
> |
if ( @_ ) { |
50 |
> |
$self->{origurl}=shift; |
51 |
> |
$self->{File}=$self->getfile($self->{origurl}); |
52 |
> |
$self->filetoparse($self->{File}->ProcessedFile()); |
53 |
> |
$self->verbose("url downloaded to ".$self->{File}->ProcessedFile()); |
54 |
> |
} |
55 |
> |
if ( defined $self->{File} ) { |
56 |
> |
return $self->{File}->url(); |
57 |
> |
} |
58 |
> |
else { return "undefined"; } |
59 |
|
} |
60 |
|
|
61 |
< |
sub getfile() { |
61 |
> |
sub getfile { |
62 |
|
my $self=shift; |
63 |
|
my $origurl=shift; |
64 |
|
|
65 |
|
my $fileref; |
66 |
< |
print "GETFILE called\n"; |
67 |
< |
my ($url, $file)=$self->{urlhandler}->get($origurl); |
66 |
> |
my ($url, $file); |
67 |
> |
if ( 0 ) { |
68 |
> |
$self->verbose("Forced download of $origurl"); |
69 |
> |
($url, $file)=$self->urldownload($origurl); |
70 |
> |
} |
71 |
> |
else { |
72 |
> |
$self->verbose("Attempting to get $origurl"); |
73 |
> |
($url, $file)=$self->urlget($origurl); |
74 |
> |
} |
75 |
|
# do we already have an appropriate object? |
76 |
< |
#my ($fileref)=$self->config()->find("__preprocessed",$url); |
139 |
< |
undef $fileref; |
76 |
> |
($fileref)=$self->{dbstore}->find($url); |
77 |
|
if ( defined $fileref ) { |
78 |
< |
print "found $url in database ----\n"; |
78 |
> |
$self->verbose("Found $url in database"); |
79 |
|
$fileref->update(); |
80 |
|
} |
81 |
|
else { |
82 |
|
if ( $file eq "" ) { |
83 |
|
$self->parseerror("Unable to get $origurl"); |
84 |
|
} |
85 |
< |
#-- set up a new preprocess file |
86 |
< |
print "Making a new file $url----\n"; |
87 |
< |
$fileref=ActiveDoc::PreProcessedFile->new($self->config()); |
85 |
> |
# -- set up a new preprocess file |
86 |
> |
$self->verbose("Making a new preprocessed file $url"); |
87 |
> |
$fileref=ActiveDoc::PreProcessedFile->new($self->{dbstore}); |
88 |
> |
$fileref->cache($self->{cache}); |
89 |
|
$fileref->url($url); |
90 |
|
$fileref->update(); |
153 |
– |
$self->config()->store($fileref,"__preprocessed",$url); |
91 |
|
} |
155 |
– |
print "---------- returning".$fileref."\n"; |
92 |
|
return $fileref; |
93 |
|
} |
94 |
|
|
95 |
< |
# -------- Error Handling and Error services -------------- |
95 |
> |
sub activatedoc { |
96 |
> |
my $self=shift; |
97 |
> |
my $url=shift; |
98 |
|
|
99 |
< |
sub error { |
100 |
< |
my $self=shift; |
101 |
< |
my $string=shift; |
99 |
> |
# first get a preprocessed copy of the file |
100 |
> |
my $fileobj=$self->getfile($url); |
101 |
> |
|
102 |
> |
# now parse it for the <Doc> tag |
103 |
> |
my $tempdoc=ActiveDoc::SimpleURLDoc->new($self->{cache}); |
104 |
> |
$tempdoc->filetoparse($fileobj->ProcessFile()); |
105 |
> |
my ($doctype,$docversion)=$tempdoc->doctype(); |
106 |
> |
undef $tempdoc; |
107 |
> |
|
108 |
> |
if ( ! defined $doctype ) { |
109 |
> |
$self->parseerror("No <Doc type=> Specified in ".$url); |
110 |
> |
} |
111 |
> |
$self->verbose("doctype required is $doctype $docversion"); |
112 |
|
|
113 |
< |
die $string."\n"; |
113 |
> |
# Set up a new object of the specified type |
114 |
> |
eval "require $doctype"; |
115 |
> |
die $@ if $@; |
116 |
> |
my $newobj=$doctype->new($self->{cache},$self->{dbstore}); |
117 |
> |
$newobj->url($url); |
118 |
> |
$newobj->parent($self); |
119 |
> |
return $newobj; |
120 |
|
} |
121 |
|
|
122 |
+ |
sub parent { |
123 |
+ |
my $self=shift; |
124 |
+ |
|
125 |
+ |
@_?$self->{parent}=shift |
126 |
+ |
:$self->{parent}; |
127 |
+ |
} |
128 |
+ |
|
129 |
+ |
# -------- Error Handling and Error services -------------- |
130 |
+ |
|
131 |
|
sub parseerror { |
132 |
|
my $self=shift; |
133 |
|
my $string=shift; |
134 |
|
|
135 |
< |
($line, $file)=$self->line(); |
136 |
< |
print "Parse Error in ".$file->url().", line ". |
135 |
> |
if ( $self->currentparsename() eq "" ) { |
136 |
> |
$self->error($string); |
137 |
> |
} |
138 |
> |
elsif ( ! defined $self->{File} ) { |
139 |
> |
print "Parse Error in ".$self->filenameref()." line " |
140 |
> |
.$self->{currentparser}->line()."\n"; |
141 |
> |
print $string."\n"; |
142 |
> |
} |
143 |
> |
else { |
144 |
> |
($line, $file)=$self->line(); |
145 |
> |
print "Parse Error in ".$file->url().", line ". |
146 |
|
$line."\n"; |
147 |
< |
print $string."\n"; |
148 |
< |
die; |
147 |
> |
print $string."\n"; |
148 |
> |
} |
149 |
> |
exit; |
150 |
|
} |
151 |
|
|
152 |
< |
sub checktag { |
153 |
< |
my $self=shift; |
154 |
< |
my $tagname=shift; |
182 |
< |
my $hashref=shift; |
183 |
< |
my $param=shift; |
152 |
> |
sub line { |
153 |
> |
my $self=shift; |
154 |
> |
my $parseline; |
155 |
|
|
156 |
< |
if ( ! exists $$hashref{$param} ) { |
157 |
< |
$self->parseerror("Incomplete Tag <$tagname> : $param required"); |
158 |
< |
} |
159 |
< |
} |
156 |
> |
if ( @_ ) { |
157 |
> |
$parseline=shift; |
158 |
> |
} |
159 |
> |
else { |
160 |
> |
$parseline=$self->{currentparser}->line(); |
161 |
> |
} |
162 |
|
|
190 |
– |
sub line { |
191 |
– |
$self=shift; |
163 |
|
my ($line, $fileobj)= |
164 |
< |
$self->{Processedfile}->line($self->{switch}->line()); |
164 |
> |
$self->{File}->realline($parseline); |
165 |
|
return ($line, $fileobj); |
166 |
|
} |
167 |
|
|
168 |
+ |
sub tagstartline { |
169 |
+ |
my $self=shift; |
170 |
+ |
my ($line, $fileobj)=$self->{File}->line( |
171 |
+ |
$self->{currentparser}->tagstartline()); |
172 |
+ |
return ($line, $fileobj); |
173 |
+ |
} |
174 |
+ |
|
175 |
|
sub file { |
176 |
|
my $self=shift; |
177 |
|
|
178 |
< |
$self->{PPf}->file(); |
178 |
> |
$self->{File}->file(); |
179 |
|
} |
180 |
|
|
181 |
< |
# --------------- Initialisation Methods --------------------------- |
204 |
< |
|
205 |
< |
sub preprocess_init { |
181 |
> |
sub ProcessFile { |
182 |
|
my $self=shift; |
207 |
– |
$self->{PPfile}=PreProcessedFile->new($self->config()); |
208 |
– |
} |
183 |
|
|
184 |
< |
sub init { |
211 |
< |
# Dummy Routine - override for derived classes |
184 |
> |
return $self->{File}->ProcessedFile(); |
185 |
|
} |
186 |
|
|
214 |
– |
# ------------------- Tag Routines ----------------------------------- |
187 |
|
# |
188 |
< |
# Base - for setting url bases |
188 |
> |
# Delegate all else to the switch |
189 |
|
# |
190 |
< |
sub Base_start { |
191 |
< |
my $self=shift; |
220 |
< |
my $name=shift; |
221 |
< |
my $hashref=shift; |
190 |
> |
#sub AUTOLOAD { |
191 |
> |
# my $self=shift; |
192 |
|
|
193 |
< |
$self->checktag($name, $hashref, 'type' ); |
194 |
< |
$self->checktag($name, $hashref, 'base' ); |
225 |
< |
|
226 |
< |
# Keep track of base tags |
227 |
< |
push @{$self->{basestack}}, $$hashref{"type"}; |
228 |
< |
# Set the base |
229 |
< |
$self->{urlhandler}->setbase($$hashref{"type"},$hashref); |
193 |
> |
# dont propogate destroy methods |
194 |
> |
# return if $AUTOLOAD=~/::DESTROY/; |
195 |
|
|
196 |
< |
} |
196 |
> |
# remove this package name |
197 |
> |
# ($name=$AUTOLOAD)=~s/ActiveDoc::ActiveDoc:://; |
198 |
|
|
199 |
< |
sub Base_end { |
200 |
< |
my $self=shift; |
201 |
< |
my $name=shift; |
236 |
< |
my $type; |
199 |
> |
# pass the message to SimpleDoc |
200 |
> |
# $self->{switch}->$name(@_); |
201 |
> |
#} |
202 |
|
|
203 |
< |
if ( $#{$self->{basestack}} == -1 ) { |
204 |
< |
print "Parse Error : unmatched </".$name."> on line ". |
205 |
< |
$self->line()."\n"; |
206 |
< |
die; |
207 |
< |
} |
208 |
< |
else { |
209 |
< |
$type = pop @{$self->{basestack}}; |
210 |
< |
$self->{urlhandler}->unsetbase($type); |
211 |
< |
} |
203 |
> |
|
204 |
> |
# ------------------- Tag Routines ----------------------------------- |
205 |
> |
sub Doc_Start { |
206 |
> |
my $self=shift; |
207 |
> |
my $name=shift; |
208 |
> |
my $hashref=shift; |
209 |
> |
|
210 |
> |
$self->checktag($name, $hashref, "type"); |
211 |
> |
$self->{doctypefound}++; |
212 |
> |
if ( $self->{doctypefound} == 1 ) { # only take first doctype |
213 |
> |
$self->{docobject}=$$hashref{'type'}; |
214 |
> |
} |
215 |
|
} |