# # SpamAssassin SNF4SA Plugin for SNFServer. # # This plugin implements a SpamAssassin rule to use SNFServer to test # whether an email is spam. # # Copyright (C) 2009 ARM Research Labs, LLC. # # snf4sa.pm # # The plugin implements a single evaluation rule, which passes the # email message through SNFServer. The communication with SNFServer # is through XCI and a temporary file on disk which contains the email # message truncated to the frist 64K bytes. # package Snf4sa; use strict; use Mail::SpamAssassin; use Mail::SpamAssassin::Plugin; use Mail::SpamAssassin::PerMsgStatus; use Mail::SpamAssassin::Logger; use IO::Socket; use IO::File; use File::Temp qw/ tempfile tempdir /; our @ISA = qw(Mail::SpamAssassin::Plugin); # Convenience variables and pseudo-constants my $CRLF = "\x0d\x0a"; # translation table for SNF rule codes my $rule_code_xlat = { 0 => 'Standard White Rules', 20 => 'GBUdb Truncate (superblack)', 40 => 'GBUdb Caution (suspicious)', 47 => 'Travel', 48 => 'Insurance', 49 => 'Antivirus Push', 50 => 'Media Theft', 51 => 'Spamware', 52 => 'Snake Oil', 53 => 'Scam Patterns', 54 => 'Porn/Adult', 55 => 'Malware & Scumware Greetings', 56 => 'Ink & Toner', 57 => 'Get Rich', 58 => 'Debt & Credit', 59 => 'Casinos & Gambling', 60 => 'Ungrouped Black Rules', 61 => 'Experimental Abstract', 62 => 'Obfuscation Techniques', 63 => 'Experimental Received [ip]', }; sub new { my ($class, $mailsa) = @_; $class = ref($class) || $class; my $self = $class->SUPER::new($mailsa); bless ($self, $class); # Name of evaluation rule. $self->register_eval_rule ("snf4sa_sacheck"); # Use localhost. $self->{SNF_Host} = "localhost"; # Use default port. $self->{SNF_Port} = 9001; # Timeout. $self->{SNF_Timeout} = 1; # Directory for files containing emails read by SNFServer. $self->{Temp_Dir} = '/tmp/snf4sa'; # Maximum email message size (including headers). $self->{SNF_MaxTempFileSize} = 64 * 1024; # Key for confidence in mail header inserted by SNFServer. $self->{GBUdb_ConfidenceKey} = "c="; # Key for probability in mail header inserted by SNFServer. $self->{GBUdb_ProbabilityKey} = "p="; # Key for GBUdb maximum weight in the configuration file. $self->{GBUdb_MaxWeightKey} = "gbudb_max_weight"; # Key for SNFServer code in configuration file. $self->{SNF_CodeKey} = "snf_result"; # Key for SA score increment in configuration file. $self->{SA_DeltaScoreKey} = "sa_score"; # Key for short circuit in configuration file. $self->{SA_ShortCircuitYesKey} = "short_circuit_yes"; # Key for no short circuit in configuration file. $self->{SA_ShortCircuitNoKey} = "short_circuit_no"; # Key for plugin score threshold in the configuration file. $self->{Plugin_score_thresholdKey} = "pre_3.2_plugin_score_threshold"; return $self; } # DEBUG/TEST. #sub extract_metadata { # # my ($self, $opts) = @_; # # print "***********************\n"; # print "extract_metadata called\n"; # print "***********************\n"; # # $opts->{msg}->put_metadata("X-Extract-Metadata:", "Test header"); # #} # END OF DEBUG/TEST. sub have_shortcircuited { my ($self, $options) = @_; if (defined($options->{permsgstatus}->{shortCircuit})) { return $options->{permsgstatus}->{shortCircuit}; } return 0; } sub parse_config { my ($self, $options) = @_; # DEBUG. #print "parse_confg. key: $options->{key}\n"; #print "parse_config. line: $options->{line}\n"; #print "parse_config. value: $options->{value}\n"; #END OF DEBUG. # Process GBUdb_max_weight. if (lc($options->{key}) eq $self->{GBUdb_MaxWeightKey}) { # GBUdb maximum weight. my $tempValue = $options->{value}; # Test that the value was a number. #$self->log_debug("Found $self->{GBUdb_MaxWeightKey} . " value: $options->{value}, tempValue: $tempValue\n"; # DEBUG. if ($tempValue =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) { # Value was a number. Load and return success. $options->{conf}->{gbuDbMaxWeight} = $tempValue; $self->inhibit_further_callbacks(); return 1; } else { $self->log_debug("Invalid value for $self->{GBUdb_MaxWeightKey} " . $tempValue); } } elsif (lc($options->{key}) eq $self->{Plugin_score_thresholdKey}) { # Plugin score threshold. my $tempValue = $options->{value}; # Test that the value was a number. #print "Found $self->{Plugin_score_thresholdKey} value: $options->{value}\n"; # DEBUG. if ($tempValue =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/) { # Value was a number. Load and return success. $options->{conf}->{pluginScoreThreshold} = $tempValue; $self->inhibit_further_callbacks(); return 1; } else { $self->log_debug("Invalid value for $self->{Plugin_score_thresholdKey} " . $tempValue); } } elsif (lc($options->{key}) eq $self->{SNF_CodeKey}) { # Relationship between SNFServer code and SA score delta. my $snf = $self->parse_snf_sa_mapping($options); if (defined($snf)) { my @snfCode = @{$snf->{snfCode}}; #print "snf->{snfCode}: @snfCode\n"; # DEBUG. #print "snf->{deltaScore}: $snf->{deltaScore}\n"; # DEBUG. #print "snf->{shortCircuit}: $snf->{shortCircuit}\n"; # DEBUG. # Save configuration. foreach my $i (@{$snf->{snfCode}}) { # Create (or update) an element in the mapping array # that snfSaMapping is a reference to. $options->{conf}->{snfSaMapping}->[$i] = { deltaScore => $snf->{deltaScore}, shortCircuit => $snf->{shortCircuit} }; } # DEBUG. #for (my $i = 0; $i < @{$options->{conf}->{snfSaMapping}}; $i++) { # if (! defined($options->{conf}->{snfSaMapping}->[$i])) { # print "No configuration for SNFServer code $i\n"; # next; # } # print "SNFServer code: $i, " . # "deltaScore: " . # "$options->{conf}->{snfSaMapping}->[$i]->{deltaScore}, " . # "shortCircuit: " . # "$options->{conf}->{snfSaMapping}->[$i]->{shortCircuit}\n"; #} # END OF DEBUG. # Successfully parsed. $self->inhibit_further_callbacks(); return 1; } } # Wasn't handled. return 0; } # Parse a snf_result configuration line. # # Input-- # # $line--String containing the snf_result line without the first word. # # Returns a reference with the following fields (if no error)-- # # snfCode--Array of SNFServer result codes that this configuration # line specifies. # # deltaScore--SA score increment for the codes in @snfCode. # # shortCircuit--True if a SNFServer code in @snfCode is to # short-circuit the message scan, false otherwise. # # If the line cannot be parsed, the return value is undef. # sub parse_snf_sa_mapping { my ($self, $options) = @_; my $value = $options->{value}; my $ret_hash = { snfCode => undef, deltaScore => undef, shortCircuit => undef }; # SNFServer codes found. my @snfCode = (); # Remove leading and trailing whitespace. $value =~ s/^\s+//; $value =~ s/\s+$//; # Convert to lower case. $value = lc($value); # Split up by white space. my @specVal = split(/\s+/, $value); if (0 == @specVal) { # No separate words. $self->log_debug("No separate words found in configuration line '" . $options->{line} . "'"); return undef; } # Convert each SNFServer result specification into an integer. my $lastSpec; for ($lastSpec = 0; $lastSpec < @specVal; $lastSpec++) { # Check for next keyword. if ($specVal[$lastSpec] eq $self->{SA_DeltaScoreKey}) { # We've completed the processing of the SNFServer result # codes. last; } # Get the code values. my @codeVal = $self->get_code_values($specVal[$lastSpec]); if (0 == @codeVal) { # No code values were obtained. $self->log_debug("Couldn't parse all the SNFServer code values " . "in configuration line '" . $options->{line} . "'"); return undef; } # Add to the list of codes. @snfCode = (@snfCode, @codeVal); } # Sort the SNFServer result codes and remove duplicates. @snfCode = sort { $a <=> $b } @snfCode; my $prev = -1; my @temp = grep($_ != $prev && (($prev) = $_), @snfCode); $ret_hash->{snfCode} = \@temp; # The $specVal[$lastSpec] is $self->{SA_DeltaScoreKey}. Return if # there aren't enough parameters. $lastSpec++; if ($lastSpec >= @specVal) { # Not enough parameters. $self->log_debug("Not enough parameters in configuration line '" . $options->{line} . "'"); return undef; } # Extract the SA delta score. $ret_hash->{deltaScore} = $specVal[$lastSpec]; if (!($ret_hash->{deltaScore} =~ /^([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?$/)) { # SA delta score isn't a number. $self->log_debug("Value after '" . $self->{SA_DeltaScoreKey} . "' ($specVal[$lastSpec]) must be a number " . "in configuration line '" . $options->{line} . "'"); return undef; } # Get short circuit spec. $lastSpec++; $ret_hash->{shortCircuit} = 0; if ( ($lastSpec + 1) == @specVal) { # A parameter was specified. my $shortCircuitSpec = $specVal[$lastSpec]; if ($self->{SA_ShortCircuitYesKey} eq $shortCircuitSpec) { # Specified short-circuit evaluation. $ret_hash->{shortCircuit} = 1; } elsif ($self->{SA_ShortCircuitNoKey} ne $shortCircuitSpec) { # Invalid short-circuit specification. $self->log_debug("Invalid short-circuit specification: '" . $specVal[$lastSpec] . "' in configuration line '" . $options->{line} . "'. Must be '$self->{SA_ShortCircuitYesKey}' " . " or '$self->{SA_ShortCircuitNoKey}'."); return undef; } } elsif ($lastSpec != @specVal) { # Too many parameters were specified. $self->log_debug("Too many parameters were specified in " . "configuration line '" . $options->{line} . "'"); return undef; } return $ret_hash; } sub get_code_values { my ($self, $specElement) = @_; my @snfCode = (); # Split the specification. my @codeVal = split(/-/, $specElement); if (1 == @codeVal) { if ($specElement =~ /^\d+$/) { # Found a single code. $snfCode[0] = 1 * $specElement; } } elsif (2 == @codeVal) { # Check range. if ( ($codeVal[0] =~ /^\d+$/) && ($codeVal[1] =~ /^\d+$/) ) { # Found a range of codes. $codeVal[0] = 1 * $codeVal[0]; $codeVal[1] = 1 * $codeVal[1]; if ($codeVal[0] <= $codeVal[1]) { # Add these SNF codes. for (my $i = $codeVal[0]; $i <= $codeVal[1]; $i++) { push(@snfCode, $i); } } } } return @snfCode; } # Output a debug message. # # Input-- # # $message--String containing the message to output. # sub log_debug { my ($self, $message) = @_; dbg("snf4sa: $message"); } # Check the message with SNFServer. sub snf4sa_sacheck { my ($self, $permsgstatus, $fulltext) = @_; my $response =''; my $exitvalue; # Make sure we have a temp dir unless(-d $self->{Temp_Dir}) { mkdir($self->{Temp_Dir}); chmod(0777, $self->{Temp_Dir}); }; # Truncate the message. my $mailtext = substr( ${$fulltext}, 0, $self->{SNF_MaxTempFileSize}); # create our temp file, $filename will contain the full path my ($fh, $filename) = tempfile( DIR => $self->{Temp_Dir} ); # spew our mail into the temp file my $SNF_fh = IO::File->new( $filename, "w" ) || die(__PACKAGE__ . ": Unable to create temporary file '" . $filename . "'"); $SNF_fh->print($mailtext) || $self->cleanup_die($filename, __PACKAGE__ . ": Unable to write to temporary file '" . $filename . "'"); $SNF_fh->close || $self->cleanup_die($filename, __PACKAGE__ . ": Unable to close temporary file '" . $filename . "'"); # Change permissions. my $cnt = chmod(0666, $filename) || $self->cleanup_die($filename, __PACKAGE__ . ": Unable to change permissions of temporary file '" . $filename . "'"); # xci_scan connects to SNFServer with XCI to scan the message my $SNF_XCI_Return = $self->xci_scan( $filename ); #print "header:\n\n$SNF_XCI_Return->{header}\n\n"; # DEBUG # Remove the temp file, we are done with it. unlink($filename); # Check response from SNFServer. if (! $SNF_XCI_Return ) { die(__PACKAGE__ . ": Internal error"); } # Check for success. if (! $SNF_XCI_Return->{"success"}) { die(__PACKAGE__ . ": Error from SNFServer: " . $SNF_XCI_Return->{"message"}); } # get the return code and translation my ( $rc, $rcx ) = ( $SNF_XCI_Return->{"code"}, $rule_code_xlat->{ $SNF_XCI_Return->{"code"} } ); $rc = -1 unless defined $rc; # default values $rcx = 'Unknown' unless $rcx; my $rch = $SNF_XCI_Return->{"header"}; # the SNF header(s) # DEBUG. # for (my $i = 0; $i < @{$permsgstatus->{main}->{conf}->{snfSaMapping}}; $i++) { # if (! defined($permsgstatus->{main}->{conf}->{snfSaMapping}->[$i])) { # print "No configuration for SNFServer code $i\n"; # next; # } # print "SNFServer code: $i, " . # "deltaScore: " . # "$permsgstatus->{main}->{conf}->{snfSaMapping}->[$i]->{deltaScore}, " . # "shortCircuit: " . # "$permsgstatus->{main}->{conf}->{snfSaMapping}->[$i]->{shortCircuit}\n"; # } # END OF DEBUG. # Initialize the change in the SA score. my $deltaScore = 0.0; # Add the score from the SNFServer return. if (defined($permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc])) { $deltaScore += $permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc]->{deltaScore}; $permsgstatus->{shortCircuit} = $permsgstatus->{main}->{conf}->{snfSaMapping}->[$rc]->{shortCircuit}; } # Perform GBUdb processing. if (defined($permsgstatus->{main}->{conf}->{gbuDbMaxWeight})) { #print "gbudbMaxWeight: $permsgstatus->{main}->{conf}->{gbuDbMaxWeight}\n\n"; # DEBUG. # Calculate the contribution to the scrore from the GBUdb results. $deltaScore += $self->calc_GBUdb($SNF_XCI_Return->{header}, $permsgstatus->{main}->{conf}->{gbuDbMaxWeight}); } # Add the headers. $permsgstatus->set_tag("SNFRESULTTAG", "$rc ($rcx)"); $permsgstatus->set_tag("SNFMESSAGESNIFFERSCANRESULT", $self->extract_header_body($SNF_XCI_Return->{header}, "X-MessageSniffer-Scan-Result")); $permsgstatus->set_tag("SNFMESSAGESNIFFERRULES", $self->extract_header_body($SNF_XCI_Return->{header}, "X-MessageSniffer-Rules")); $permsgstatus->set_tag("SNFGBUDBANALYSIS", $self->extract_header_body($SNF_XCI_Return->{header}, "X-GBUdb-Analysis")); # If dynamic scoring isn't supported, this rule fires if the score is # above the threshold. if (Mail::SpamAssassin::Version() le "3.2.0") { my $pluginScoreThreshold = 2.5; # Default value. if (defined($permsgstatus->{main}->{conf}->{pluginScoreThreshold})) { $pluginScoreThreshold = $permsgstatus->{main}->{conf}->{pluginScoreThreshold}; } #print "Before comparison. pluginScoreThreshold: $pluginScoreThreshold deltaScore: $deltaScore\n"; # DEBUG if ($deltaScore < $pluginScoreThreshold) { $deltaScore = 0.0; } } # print "After comparison. deltaScore: $deltaScore\n"; # DEBUG # Submit the score. if ($deltaScore) { $permsgstatus->got_hit("SNF4SA", "", score => $deltaScore); for my $set (0..3) { $permsgstatus->{conf}->{scoreset}->[$set]->{"SNF4SA"} = sprintf("%0.3f", $deltaScore); } } # Always return zero, since the score was submitted via got_hit() # above. return 0; } # Calculate the contribution of the GBUdb scan to the SA score. # # Input-- # # $headers--String containing the headers. # # $weight--Weight used to calculate the contribution. # # Returns the contribution to the SA score (float). # sub calc_GBUdb { my ( $self, $headers, $weight ) = @_; # Split the header into lines. my @headerLine = split(/\n/, $headers); # Find the line containing the GBUdb results. my $line; foreach $line (@headerLine) { # Search for the tag. if ($line =~ /^X-GBUdb-Analysis:/) { # GBUdb analysis was done. Extract the values. my $ind0 = index($line, $self->{GBUdb_ConfidenceKey}); my $ind1 = index($line, " ", $ind0 + 2); if (-1 == $ind0) { return 0.0; } my $c = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2); #print "calc_GBUdb. line: $line\n"; # DEBUG #print "calc_GBUdb. c: $c, ind0: $ind0, ind1: $ind1\n"; # DEBUG $ind0 = index($line, $self->{GBUdb_ProbabilityKey}); $ind1 = index($line, " ", $ind0 + 2); if (-1 == $ind0) { return 0.0; } my $p = 1.0 * substr($line, $ind0 + 2, $ind1 - $ind0 - 2); #print "calc_GBUdb. p: $p, ind0: $ind0, ind1: $ind1\n"; # DEBUG # Calculate and return the score. my $score = abs($p * $c) ** 0.5; $score *= $weight; if ($p < 0.0) { $score *= -1.0; } # DEBUG. #print "calc_GBUdb. p: $p, c: $c, weight: $weight\n"; #print "calc_GBUdb. score: $score\n"; # END OF DEBUG. return $score; } } } # Extract the specified header body from a string containing all the # headers. # # Input-- # # $headers--String containing the headers. # # $head--String containing the head of the header to extract. # # Returns the body of the header. # sub extract_header_body { my ( $self, $headers, $head ) = @_; my $body = ""; if ($headers =~ /$head:(.*)/s) { my $temp = $1; $temp =~ /(.*)\nX-(.*)/s; $body = $1; } return $body; } # xci_scan( $file ) # returns hashref: # success : true/false # code : response code from SNF # message : scalar message (if any) sub xci_scan { my ( $self, $file ) = @_; return undef unless $self and $file; my $ret_hash = { success => undef, code => undef, message => undef, header => undef, xml => undef }; my $xci = $self->connect_socket( $self->{SNF_Host}, $self->{SNF_Port} ) or return $self->err_hash("cannot connect to socket ($!)"); $xci->print("\n"); my $rc = $ret_hash->{xml} = $self->socket_response($xci, $file); $xci->close; if ( $rc =~ /^/ ) { $ret_hash->{success} = 1; $ret_hash->{code} = $1; $rc =~ /(.*)<\/xhdr>/s and $ret_hash->{header} = $1; } elsif ( $rc =~ /^{message} = $1; } else { $ret_hash->{message} = "unknown XCI response: $rc"; } return $ret_hash; } # connect_socket( $host, $port ) # returns IO::Socket handle sub connect_socket { my ( $self, $host, $port ) = @_; return undef unless $self and $host and $port; my $protoname = 'tcp'; # Proto should default to tcp but it's not expensive to specify $self->{XCI_Socket} = IO::Socket::INET->new( PeerAddr => $host, PeerPort => $port, Proto => $protoname, Timeout => $self->{SNF_Timeout} ) or return undef; $self->{XCI_Socket}->autoflush(1); # make sure autoflush is on -- legacy return $self->{XCI_Socket}; # return the socket handle } # socket_response( $socket_handle ) # returns scalar string sub socket_response { my ( $self, $rs, $file ) = @_; my $buf = ''; # buffer for response # blocking timeout for servers who accept but don't answer eval { local $SIG{ALRM} = sub { die "timeout\n" }; # set up the interrupt alarm $self->{SNF_Timeout}; # set up the alarm while (<$rs>) { # read the socket $buf .= $_; } alarm 0; # reset the alarm }; # report a blocking timeout if ( $@ eq "timeout\n" ) { $self->cleanup_die($file, __PACKAGE__ . ": Timeout waiting for response from SNFServer"); } elsif ( $@ =~ /alarm.*unimplemented/ ) { # no signals on Win32 while (<$rs>) { # get whatever's left # in the socket. $buf .= $_; } } return $buf; } # return an error message for xci_scan sub err_hash { my ( $self, $message ) = @_; return { success => undef, code => undef, message => $message }; } sub cleanup_die { my ( $self, $file, $message ) = @_; unlink($file); die($message); } 1;