*** FuzzyOcr.pm.orig 2006-09-06 16:50:49.000000000 -0700 --- FuzzyOcr.pm 2006-09-06 21:08:49.000000000 -0700 *************** *** 764,769 **** } } - debuglog("Hash not yet known to the database, saving for later db storage..."); - push(@hashes, $digest); } else { debuglog("Image hashing disabled in configuration, skipping..."); --- 764,767 ---- *************** *** 788,791 **** --- 786,791 ---- push( @used_scansets, $scanset ); } + + my $mcnt = 0; foreach my $w (@words) { my $wthreshold; *************** *** 819,826 **** --- 819,835 ---- } $cnt += $wcnt; + $mcnt += $wcnt; if ( ( $verbose > 0 ) and ($wcnt) ) { push( @found, "\"$w\" in $wcnt lines" ); } } + if ($mcnt > 0) { + debuglog("Image contains $mcnt word matches, looks like spam."); + if ($enable_image_hashing && $hashing_learn_scanned) { + push(@hashes, $digest); + } + } else { + debuglog("Image contains no word matches, and will be ignored."); + } } } *************** *** 828,832 **** my $score = ( $base_score + ( $cnt - $countreq ) * $add_score ); if($enable_image_hashing and $hashing_learn_scanned) { ! debuglog("Message is spam (score $score), storing all image hashes in database..."); foreach (@hashes) { add_image_hash_db($_, $score); --- 837,841 ---- my $score = ( $base_score + ( $cnt - $countreq ) * $add_score ); if($enable_image_hashing and $hashing_learn_scanned) { ! debuglog("Message is spam (score $score), adding spam images to the database..."); foreach (@hashes) { add_image_hash_db($_, $score);