projects
/
spider.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
and also in sh/db0sdx.pl
[spider.git]
/
perl
/
BadWords.pm
diff --git
a/perl/BadWords.pm
b/perl/BadWords.pm
index db33d7a1c4ebaeb34127c79bb1f5c1463dc35463..05a41bcc01aa15f1740e9a6545ace2e65d172156 100644
(file)
--- a/
perl/BadWords.pm
+++ b/
perl/BadWords.pm
@@
-3,7
+3,7
@@
#
# Copyright (c) 2000 Dirk Koopman
#
#
# Copyright (c) 2000 Dirk Koopman
#
-#
$Id$
+#
#
package BadWords;
#
package BadWords;
@@
-17,24
+17,18
@@
use DXDebug;
use IO::File;
use IO::File;
-use vars qw($badword
@regex
);
+use vars qw($badword
$regexcode
);
my $oldfn = "$main::data/badwords";
my $regex = "$main::data/badw_regex";
my $bwfn = "$main::data/badword";
# copy issue ones across
my $oldfn = "$main::data/badwords";
my $regex = "$main::data/badw_regex";
my $bwfn = "$main::data/badword";
# copy issue ones across
-filecopy("$regex.issue", $regex) unless -e $regex;
+filecopy("$regex.
gb.
issue", $regex) unless -e $regex;
filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;
$badword = new DXHash "badword";
filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;
$badword = new DXHash "badword";
-use vars qw($VERSION $BRANCH);
-$VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
-$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0;
-$main::build += $VERSION;
-$main::branch += $BRANCH;
-
# load the badwords file
sub load
{
# load the badwords file
sub load
{
@@
-61,11
+55,10
@@
sub load
sub create_regex
{
my @out;
sub create_regex
{
my @out;
- @regex = ();
-
my $fh = new IO::File $regex;
if ($fh) {
my $fh = new IO::File $regex;
if ($fh) {
+ my $s = "sub { my \$str = shift; my \@out; \n";
while (<$fh>) {
chomp;
next if /^\s*\#/;
while (<$fh>) {
chomp;
next if /^\s*\#/;
@@
-75,12
+68,18
@@
sub create_regex
# and repeated characters in it
my $w = uc $_;
my @l = split //, $w;
# and repeated characters in it
my $w = uc $_;
my @l = split //, $w;
- my $e = join '+[\s\W]+', @l;
- my $s = eval qq{sub { return \$_[0] =~ /$e+/ ? '$w' : () } };
- push @regex, $s unless $@;
- dbg("create_regex: $@") if $@;
+ my $e = join '+[\s\W]*', @l;
+ $s .= "push \@out, \$1 if \$str =~ /\\b($e)/;\n";
}
}
}
}
+ $s .= "return \@out;\n}";
+ $regexcode = eval $s;
+ dbg($s) if isdbg('badword');
+ if ($@) {
+ @out = ($@);
+ dbg($@);
+ return @out;
+ }
$fh->close;
} else {
my $l = "can't open $regex $!";
$fh->close;
} else {
my $l = "can't open $regex $!";
@@
-96,15
+95,12
@@
sub check
{
my $s = uc shift;
my @out;
{
my $s = uc shift;
my @out;
-
- for (@regex) {
- push @out, &$_($s);
- }
+
+ push @out, &$regexcode($s) if $regexcode;
return @out if @out;
return @out if @out;
- for (split(/\s+/, $s)) {
- s/\'?S$//;
+ for (split(/\b/, $s)) {
push @out, $_ if $badword->in($_);
}
push @out, $_ if $badword->in($_);
}