projects
/
spider.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
add CTY-2614
[spider.git]
/
perl
/
AnnTalk.pm
diff --git
a/perl/AnnTalk.pm
b/perl/AnnTalk.pm
index 0090eb04824581d567903654a87fb90a87b005bb..6d47726420f9cab730555853303429bd93367c44 100644
(file)
--- a/
perl/AnnTalk.pm
+++ b/
perl/AnnTalk.pm
@@
-3,7
+3,7
@@
#
# Copyright (c) 2000 Dirk Koopman
#
#
# Copyright (c) 2000 Dirk Koopman
#
-#
$Id$
+#
#
package AnnTalk;
#
package AnnTalk;
@@
-17,8
+17,8
@@
use DXVars;
use vars qw(%dup $duplth $dupage $filterdef);
use vars qw(%dup $duplth $dupage $filterdef);
-$duplth =
6
0; # the length of text to use in the deduping
-$dupage =
5*24*3600; # the length of time to hold spot
dups
+$duplth =
3
0; # the length of text to use in the deduping
+$dupage =
18*3600; # the length of time to hold ann
dups
$filterdef = bless ([
# tag, sort, field, priv, special parser
['by', 'c', 0],
$filterdef = bless ([
# tag, sort, field, priv, special parser
['by', 'c', 0],
@@
-47,11
+47,10
@@
sub dup
chomp $text;
unpad($text);
$text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg;
chomp $text;
unpad($text);
$text =~ s/\%([0-9A-F][0-9A-F])/chr(hex($1))/eg;
- $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1);
- $text = pack("C*", map {$_ & 127} unpack("C*", $text));
+# $text = Encode::encode("iso-8859-1", $text) if $main::can_encode && Encode::is_utf8($text, 1);
$text =~ s/[^\#a-zA-Z0-9]//g;
$text = substr($text, 0, $duplth) if length $text > $duplth;
$text =~ s/[^\#a-zA-Z0-9]//g;
$text = substr($text, 0, $duplth) if length $text > $duplth;
- my $dupkey = "A$to|\L$text";
+ my $dupkey = "A$
call|$
to|\L$text";
return DXDupe::check($dupkey, $t);
}
return DXDupe::check($dupkey, $t);
}