#-----------------------------------------------------------------
# Function: parseLogLine
# Purpose: Read a line from an apache web server log and split
# into components
# Author: Tony Byorick
# I/O: $record - one line of a web access log file
# $retHash - a reference to a hash which returns values
# parsed by this subroutine.
#
#-----------------------------------------------------------------
sub parseLogLine
{
my $record = shift; # A single line from the access log
my $retHash = shift; # reference to output hash
my $all; # hold remainder of log entry after each regex match
my $trimmed; # hold trimmed up values; when tweaking needed. Trimmed
# value is returned to main subroutine
my ($host, $ident_user, $auth_user, $date, $time, $time_zone, $method, $url, $protocol, $status, $bytes, $referer, $client);
$all = $record; # move input line into working storage
if ( $all =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/) {
$host = $&; # match on IP address
$all = $'; # move post-match to working storage
$$retHash{'host'} = $host; # add to return hash
}
if ( $all =~ /\[.+]/) {
$date = $&; # match on date/time/tz
$all = $'; # move post-match to working storage
($trimmed = $date) =~ s/]$//; # strip trailing bracket
$trimmed =~ s/^\[//; # strip leading bracket
$$retHash{'date'} = $trimmed; # add to return hash
}
if ( $all =~ /(GET|POST)/) {
$method = $&; # match on http method
$all = $'; # move post-match to working storage
$$retHash{'method'} = $method; # add to return hash
}
if ( $all =~ /\/(\S+)\s/) {
$url = $&;
$all = $'; # move post-match to working storage
($trimmed = $url) =~ s/\s+$//; # strip trailing spaces
$$retHash{'url'} = $trimmed; # add to return hash
}
if ( $all =~ /HTTP\/\S+\s/) {
$protocol = $&; # match on protocol
$all = $'; # move post-match to working storage
($trimmed = $protocol) =~ s/\s+$//; # strip trailing spaces
$trimmed =~ s/\"$//; # strip trailing double quotes
$$retHash{'protocol'} = $trimmed; # add to return hash
}
if ( $all =~ /(\d+)\s/) {
$status = $&; # match on http status code
$all = $'; # move post-match to working storage
($trimmed = $status) =~ s/\s+$//; # strip trailing spaces
$$retHash{'status'} = $trimmed; # add to return hash
}
if ( $all =~ /(\d+)\s/) {
$bytes = $&; # match on page size - bytes
$all = $'; # move post-match to working storage
($trimmed = $bytes) =~ s/\s+$//; # strip trailing spaces
$$retHash{'bytes'} = $trimmed; # add to return hash
}
if ( ($all =~ /\"\w(\S+)\"\s/i) || ($all =~ /\"-\"/i) ) {
$referer = $&; # match on referer
$all = $'; # move post-match to working storage
($trimmed = $referer) =~ s/\s+$//; # strip trailing spaces
$trimmed =~ s/\"$//; # strip trailing double quotes
$$retHash{'referer'} = $trimmed; # add to return hash
}
if ( $all =~ /\"Mozilla(.+)\"\s/i) {
$client = $&; # match on client type
$all = $'; # move post-match to working storage
($trimmed = $client) =~ s/\s+$//; # strip trailing spaces
$trimmed =~ s/\"$//; # strip trailing double quotes
$trimmed =~ s/^\"//; # strip leading double quotes
$$retHash{'client'} = $trimmed; # add to return hash
}
}