2 # Search for bad words in strings
4 # Copyright (c) 2000 Dirk Koopman
20 use vars qw($badword @regex);
22 my $oldfn = "$main::data/badwords";
23 my $regex = "$main::data/badw_regex";
24 my $bwfn = "$main::data/badword";
26 # copy issue ones across
27 filecopy("$regex.issue", $regex) unless -e $regex;
28 filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;
30 $badword = new DXHash "badword";
32 use vars qw($VERSION $BRANCH);
33 $VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
34 $BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0;
35 $main::build += $VERSION;
36 $main::branch += $BRANCH;
38 # load the badwords file
42 my $fh = new IO::File $oldfn;
57 push @out, create_regex();
66 my $fh = new IO::File $regex;
74 # create a closure for each word so that it matches stuff with spaces/punctuation
75 # and repeated characters in it
77 my @l = map { $_ eq 'I' ? '[I1]' : ($_ eq 'O' ? '[O0]' : $_) }split //, $w;
78 my $e = join '+[\s\W]+', @l;
79 my $s = eval qq{sub { return \$_[0] =~ /$e+/ ? '$w' : () } };
80 push @regex, $s unless $@;
81 dbg("create_regex: $@") if $@;
86 my $l = "can't open $regex $!";
94 # check the text against the badwords list
106 for (split(/\s+/, $s)) {
108 push @out, $_ if $badword->in($_);