]> scm.dxcluster.org Git - spider.git/blob - perl/BadWords.pm
added even more colouration in an attempt to make it as clear as possible
[spider.git] / perl / BadWords.pm
1 #
2 # Search for bad words in strings
3 #
4 # Copyright (c) 2000 Dirk Koopman
5 #
6 # $Id$
7 #
8
9 package BadWords;
10
11 use strict;
12
13 use DXUtil;
14 use DXVars;
15 use IO::File;
16
17 use vars qw(%badwords $fn);
18
19 $fn = "$main::data/badwords";
20 %badwords = ();
21
22 use vars qw($VERSION $BRANCH);
23 $VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
24 $BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0;
25 $main::build += $VERSION;
26 $main::branch += $BRANCH;
27
28 # load the badwords file
29 sub load
30 {
31         my @out;
32         return unless -e $fn;
33         my $fh = new IO::File $fn;
34         
35         if ($fh) {
36                 %badwords = ();
37                 while (<$fh>) {
38                         chomp;
39                         next if /^\s*\#/;
40                         my @list = split " ";
41                         for (@list) {
42                                 $badwords{lc $_}++;
43                         }
44                 }
45                 $fh->close;
46         } else {
47                 my $l = "can't open $fn $!";
48                 dbg('err', $l);
49                 push @out, $l;
50         }
51         return @out;
52 }
53
54 # check the text against the badwords list
55 sub check
56 {
57         return grep { $badwords{$_} } split(/\b/, lc shift);
58 }
59
60 1;