#!/usr/bin/perl
#
# Convert seek-phrases-in-log output to something suitable for use as a production ruleset

my $nameprefix = (shift @ARGV);
my $num = 0;
my @rule = ();
while (<>) {
  # fix control chars, high-bit chars
  s/([\x00-\x09\x0b-\x1f\x7f-\xff])/
	sprintf "\\x{%02x}", ord $1;
  /gex;

  print;
  if (/^body\s+(\S+)\s/) {
    push @rule, $1;
  }
  if (/^# passed hit-rate threshold: (\S+)/) {
    end_subrule_block();
    start_subrule_block($1);
  }
}
end_subrule_block();
exit;

sub start_subrule_block {
  @rule = ();
}

sub end_subrule_block {
  $num++;
  if (@rule) {
    print "
meta $nameprefix$num   (".join(" || ",@rule).")
score $nameprefix$num  3.0
describe $nameprefix$num  Body contains frequently-spammed text patterns
";
  }
  else {
    print "
meta $nameprefix$num   (0)
score $nameprefix$num  0
describe $nameprefix$num  Body contains frequently-spammed text patterns
";
  }
}
