aggregate – report event counts from a stream

Another shell utility. This one is useful for, e.g. counting 404, 500, 200, 302 HTTP codes from a log file.

#!/usr/bin/perl
$|++;
use strict;
use Getopt::Long;
 
my $mode = 'line';
my $tick = 100;
my $help = undef;
my $keysfile = undef;
my %keys = ();
 
GetOptions(
  'mode|m=s' => \$mode,
  'tick|t=i' => \$tick,
  'help|h'   => \$help,
  'keys|k=f' => \$keysfile,
);
 
if ( $help || ( $mode ne 'line' && $mode ne 'time' ) || $tick <= 0 || ( defined($keysfile) && !-f $keysfile ) ) {
  my $USAGE = join '', <DATA>;
  print STDERR $USAGE and exit(1);
}
 
if ( $keysfile ) {
  open(K, $keysfile) or die "Couldn't open keys file '$keysfile': $!";
  while ( my $line = <K> ) {
    chomp $line;
    $keys{ $line }++;
  }
  close(K);
}
 
my %count = %keys;
my $offset = 0;
my $mark = 0;
my $offset = 0;
 
if ( $mode eq 'time' ) {
  $mark = time();
}
 
while ( my $element = <> ) {
  chomp $element;
  if ( scalar( %keys ) ) {
    $count{ $element }++ if $keys{ $element };
  }
  else {
    $count{ $element }++;
  }
 
  if ( $mode eq 'line' ) {
    $offset++;
    $mark++;
    if ( $mark >= $tick ) {
      $mark = 0;
      flush();
    }
  }
  elsif ( $mode eq 'time' ) {
    if ( $mark + $tick < time() ) {
      $offset = time();
      $mark = time();
      flush();
    }
  }
}
flush();
 
sub flush {
  print "summary/$tick @ $offset\n";
  foreach my $k ( sort keys %count ) {
    print "\t", $count{ $k }, "\t", $k, "\n";
  }
  %count = %keys;
}
 
__DATA__
Usage: aggregate [-h] [-m <time|line>] [-t <# of seconds or lines>] [-k <keys file>]
 
Read lines from STDIN.  Print lines by frequency per input lines or time.
 
  -h    show help (this message)
  -m    mode.  one of 'time' or 'line'.  defaults to 'line'.
  -t    aggregation size.  an integer.  value is # of lines ('line' mode) or # of
        seconds ('time' mode) after which an aggregation is triggered.  defaults to 100.
  -k    keys file.  a text file of strings to *exactly* match in the input, one per line.
        if a keys file is provided, lines not present in the keys file will be silently
        ignored.