3 # an attempt at producing a general purpose 'bot' for going and getting
4 # things orf the web and presenting them to user in a form they want
6 # This program uses LWP::Parallel::UserAgent to do its business
8 # each sub bot has the same structure and calling interface, but the actual
9 # input and output data formats are completely arbitrary
11 # Copyright (c) 1999 - Dirk Koopman, Tobit Computer Co Ltd
24 require LWP::Parallel::UserAgent;
34 use vars qw($version);
40 $SIG{INT} = $SIG{TERM} = 'IGNORE';
48 return 'Continue' if /100/;
49 return 'Switching protocols' if /101/;
52 return 'Created' if /201/;
53 return 'Accepted' if /202/;
54 return 'Non Authoritive' if /203/;
55 return 'No Content' if /204/;
56 return 'Reset Content' if /205/;
57 return 'Partial Content' if /206/;
59 return 'Multiple Choices' if /300/;
60 return 'Moved Permanently' if /301/;
61 return 'Found, redirect' if /302/;
62 return 'See Other' if /303/;
63 return 'Not modified' if /304/;
64 return 'Use proxy' if /305/;
66 return 'Bad request' if /400/;
67 return 'Unauthorized' if /401/;
68 return 'Payment required' if /402/;
69 return 'Forbidden' if /403/;
70 return 'Not Found' if /404/;
71 return 'Method not allowed' if /405/;
72 return 'Not acceptable' if /406/;
73 return 'Proxy authentication required' if /407/;
74 return 'Request timeout' if /408/;
75 return 'Conflict' if /409/;
76 return 'Gone' if /410/;
77 return 'Length required' if /411/;
78 return 'Precondition failed' if /412/;
79 return 'Request entity too large' if /413/;
80 return 'Request-URI too long' if /414/;
81 return 'Unsupported media type' if /415/;
82 return 'Requested range not satifiable' if /416/;
83 return 'Expectation failed' if /417/;
85 return 'Internal server error' if /500/;
86 return 'Not implemented' if /501/;
87 return 'Bad gateway' if /502/;
88 return 'Service unavailable' if /503/;
89 return 'Gateway timeout' if /504/;
90 return 'HTTP version not supported' if /505/;
99 $s =~ s/\b(?:THE|\&|A|AND|OR|NOT)\b//gi;
100 $s =~ s/(?:\(|\))//g;
101 return join('|', split(/\s+/, $s));
104 # qrz specific routines
107 my ($ua, $call, $title) = @_;
108 my $sreq = "http://www.qrz.com/callsign.html?callsign=$call";
110 my $req = HTTP::Request->new('GET', $sreq);
111 return $ua->register($req);
116 my ($fh, $call, $title, $code, $content) = @_;
118 print $fh "QRZ|$code|", trancode($code), "\n";
123 my $r = new QRZ $call;
127 my $l = length $content;
128 for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) {
133 my @lines = $r->answer;
135 print $fh "QRZ|$code|$_\n" if $_;
137 print "lines: ", scalar @lines, "\n";
140 # k4ute specific routines
143 my ($ua, $call, $title) = @_;
144 my $sreq = "http://no4j.com/nfdxa/qsl/index.asp?dx=$call";
146 my $req = HTTP::Request->new('GET', $sreq);
147 return $ua->register($req);
152 my ($fh, $call, $title, $code, $content) = @_;
154 print $fh "UTE|$code|", trancode($code), "\n";
159 my $r = new K4UTE $call;
163 my $l = length $content;
164 for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) {
169 my @lines = $r->answer;
171 print $fh "UTE|$code|$_\n" if $_;
173 print "lines: ", scalar @lines, "\n";
176 # buckmaster specific routines
179 my ($ua, $call, $title) = @_;
180 my $sreq = "http://www.buck.com/cgi-bin/do_hamcallexe";
182 my $req = HTTP::Request->new('POST', $sreq);
183 $req->add_content("entry=$call");
184 return $ua->register($req);
189 my ($fh, $call, $title, $code, $content) = @_;
191 print $fh "BCK|$code|", trancode($code), "\n";
196 my $r = new Buck $call;
200 my $l = length $content;
201 for ($i = 0; $i < $l && ($chunk = substr($content, $i, 512)); $i += 512) {
206 my @lines = $r->answer;
208 print $fh "BCK|$code|$_\n" if $_;
210 print "lines: ", scalar @lines, "\n";
214 # this is what is called when an incoming request is taken
221 if (defined ($line = <$fh>)) {
222 $line =~ s/[\r\n]+$//g;
228 $line =~ s/^[^[A-Za-z0-9\|]]+//g;
230 my ($call, $title) = split /\|/, $line;
231 return if $call eq 'quit' || $call eq 'QUIT';
233 print "{A = '$call'";
234 print $title ? ", T = '$title'}\n" : "}\n";
236 my $ua = LWP::Parallel::UserAgent->new;
238 # set up various UA things
239 $ua->duplicates(0); # ignore duplicates
241 $ua->redirect(1); # follow 302 redirects
242 $ua->agent("DXSpider callbot $version");
245 my $art = uri_escape($call);
246 my $tit = uri_escape($title);
249 if ($res = req_qrz($ua, $art, $tit)) {
250 print $fh "QRZ|500\n";
253 if ($res = req_buck($ua, $art, $tit)) {
254 print $fh "BCK|500\n";
257 if ($res = req_ute($ua, $art, $tit)) {
258 print $fh "UTE|500\n";
261 # wait for all the results to come back
262 my $entries = $ua->wait();
264 for (keys %$entries) {
265 $res = $entries->{$_}->response;
266 my $uri = $res->request->url;
267 my $code = $res->code;
268 print "url: ", $uri, " code: ", $code, "\n";
270 # now parse each result
272 parse_qrz($fh, $call, $title, $code, $res->content), last if /www.qrz.com/i;
273 parse_buck($fh, $call, $title, $code, $res->content), last if /www.buck.com/i;
274 parse_ute($fh, $call, $title, $code, $res->content), last if /no4j.com/i;
281 $SIG{QUIT} = \&cease;
282 $SIG{HUP} = 'IGNORE';
283 STDOUT->autoflush(1);
285 my $server = new ForkingServer \&child;
287 $server->allow('.*');