#!c:\perl\bin\perl.exe
use strict;
use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
#use lib "lib";
use QueryLogger;
use WebLogger;
use AccessLogger;
use CGI;
# In ActiveState Perl this is the Time-Modules package
# ppm install "time-modules"
use Time::ParseDate;
# In ActiveState Perl this is the Time-Modules package
# ppm install "Net-IP"
use Net::IP;
#perl2exe_include QueryLogger
#perl2exe_include WebLogger
#perl2exe_include AccessLogger
#perl2exe_include Archive::Zip
#perl2exe_include strict
#perl2exe_include Time::ParseDate
#perl2exe_include Net::IP

my $debug = 0;

my %knownRanges;
my $ipr = new Net::IP('127.0.0.1');
my $ipv = new Net::IP('127.0.0.1');
#print "content-type:text/html\n\n";
my $query = new CGI;

if (scalar($query->param()) == 0) {
print "\n\nMODES:
	- usertype=inst || usertype=user
	- mingroupfilter=
	- minresultfilter=
	- minpopfilter=
	- minfilter=
    - minaccessfilter=
    - minexportinstfilter=
    - minexportfilefilter=
    - minwebinstfilter=
    - minwebfilefilter=
	- cid=10000
	- vcid=NA
	- iid=VRA
	- process=all
	- headerfile=filename.html
	- footerfile=filename.html
	- fileprefix=VRA
	- logdir=c:\logs\
	- overideWebRegex=(/luna/|client=luna)

    NOTE: the trailing slash is required on a directory

	- logsuffix=txt,xml,log
	- map=131.229.219.0-131.229.219.255:TEST
	- fromdate=01/01/2000
	- todate=01/01/2002
	- outdir=c:\inetpub\...
	- propsfile=file.props
	dates may also include 'today','yesterday',
	   'last week','last month','last year'
";
exit;
};

#added to allow for multiple properties files
my @propsfiles = $query->param('propsfile');

for my $propsfile (@propsfiles) {
    print STDERR " - loading properties file $propsfile\n";
    open (PROPS,$propsfile) or die "cannot find Properties file file.props";

    my %props;
    while(<PROPS>) {
     next if $_ =~ /^\#/;
     $_ =~ s/[\r\n]//ig;
     my ($key,$val) = split(/\=/,$_);
     if (!defined $query->param($key)) {
       $query->param(-name=>"$key",-value=>"$val");
     } else {
       $query->append(-name=>"$key",-value=>"$val");
     };
    };
    close(PROPS);
};

my $cid = $query->param('cid');
die "Collection ID (cid) parameter is Required" if $cid eq "";
my $vcid = $query->param('vcid') || 'NA';
my $iid = $query->param('iid');
die "Institution ID (iid) parameter is Required" if $iid eq "";

#for($query->param()) {print $_."\t";print join("\t",$query->param($_));print "\r\n";};

my @pr;

my $prefix = $query->param("outdir");
     $prefix .= "output/";
my $datestart = $query->param("fromdate");
my $dateend = $query->param("todate");
my $override = $query->param("overideWebRegex");
print STDERR " - analyzing from $datestart to $dateend\n";


my $fileprefix = $query->param("fileprefix");
my $ftype = $query->param("usertype") || "institution";
my $fval = $query->param("userfilter");
$fval =~ s/ /_/ig;
my $afilter = $query->param("minaccessfilter");
my $gfilter = $query->param("mingroupfilter");
my $pfilter = $query->param("minpopfilter");
my $rfilter = $query->param("minresultfilter");
my $xifilter = $query->param("minexportinstfilter");
my $xffilter = $query->param("minexportfilefilter");
my $wifilter = $query->param("minwebinstfilter");
my $wffilter = $query->param("minwebfilefilter");
my $process = $query->param("process");

print STDERR " - user is $ftype\n";
print STDERR " - processing these users: $process\n";
print STDERR " - filtering user access list to more than: $afilter\n";
print STDERR " - filtering group access list to more than: $gfilter\n";
print STDERR " - filtering query popularity list to more than: $pfilter\n";
print STDERR " - filtering query results list to more than: $rfilter\n";
print STDERR " - filtering export by institution list list to more than: $xifilter\n";
print STDERR " - filtering export by file list to more than: $xffilter\n";
print STDERR " - filtering image request by institution to more than: $wifilter\n";
print STDERR " - filtering image request by file to more than: $wffilter\n";
print STDERR " - overriding web parameter matching with $override\n" if $override ne "";

my $err;
($datestart,$err) = parsedate($datestart);
die "invalid date $err" if $err ne "";
($dateend,$err) = parsedate($dateend);
die "invalid date $err" if $err ne "";

my @lt = localtime($datestart);
my $day = $lt[3];
$day = "0".$day if length($day) ==1;
my $mon = 1 + $lt[4];
$mon = "0".$mon if length($mon) ==1;
my $year = 1900 + $lt[5];
my $startout = "$year$mon$day";
@lt = localtime($dateend);
$day = $lt[3];
$day = "0".$day if length($day) ==1;
$mon = 1 + $lt[4];
$mon = "0".$mon if length($mon) ==1;
$year = 1900 + $lt[5];
my $endout = "$year$mon$day";

my @logs = @{&getLogDirs(join("\t",$query->param("logdir")),join(",",$query->param("logsuffix")))};
#print @logs;
my $headerfile = $query->param('headerfile');
my $footerfile = $query->param('footerfile');
my @tmpf;
if (-e $headerfile) {
open (FILE,$headerfile) or warn "cannot open $headerfile";
push @tmpf,<FILE>;
$headerfile = "@tmpf";
@tmpf = undef;
close FILE;
};

if (-e $footerfile) {
open (FILE,$footerfile) or warn "cannot open $footerfile";
push @tmpf,<FILE>;
$footerfile = "@tmpf";
@tmpf = undef;
close FILE;
};
my $qlog = new QueryLogger($ftype,$cid,$vcid,$iid);
my $alog = new AccessLogger($ftype,$cid,$vcid,$iid);
my $weblog = new WebLogger($ftype,$cid,$vcid,$iid,"Image Access",$override);
my $elog = new WebLogger($ftype,$cid,$vcid,$iid,"File Export");

print STDERR "parsing logfiles\n";

for my $mode (qw(Access Query Image Export)) {
    $qlog = new QueryLogger($ftype,$cid,$vcid,$iid);
    $alog = new AccessLogger($ftype,$cid,$vcid,$iid);
    $weblog = new WebLogger($ftype,$cid,$vcid,$iid,"Image Access",$override);
    $elog = new WebLogger($ftype,$cid,$vcid,$iid,"File Export");

    print "running $mode reports\n";
    for my $file (@logs) {
      print STDERR "  - $file\n";
      #print "processing $file..\n";
      if ($file !~ /zip$/) {
        &processFile($file,$mode);
      } else {
        my $zip = Archive::Zip->new();
        if ($zip->read( $file ) != AZ_OK) {warn 'read error';next;};
        for my $name ($zip->memberNames()) {
        	my $member = $zip->memberNamed( $name );
        	print STDERR "  - $file >> $name\n";
       	&processFile("__ZIP",$mode,$member->contents());
     };
      };
    }

    print STDERR "\n\ncreating reports:\n";

    if ($process =~ /all/ && $mode =~ /access/i) {
      @pr = ("");
      push @pr,keys %{$alog->getInst()};
    };

    if ($process eq "" && $mode =~ /access/i) {@pr = ("");};
    if ($fval ne "" && $mode =~ /access/i) {@pr = ("$fval");};


    my %seen = ();
    $prefix =~ s/[\r\n]//ig;

      mkdir("$prefix") if (!-e $prefix);
    for my $fil (@pr) {
      next if defined $seen{$fil};
      $seen{$fil} = 1;
      mkdir( $prefix.$fil) if (!-e $prefix.$fil && $fil ne "");
      my $fn = "$prefix$fil\/$fileprefix$startout-$endout\.html";
      $fn = "$prefix$fileprefix$startout-$endout\.html" if $fil eq "";
      $fn =~ s/[\r\n]//ig;

      print STDERR "  - $fn\n";
      if ($mode =~ /access/i) {
          open (FILE, ">$fn") or warn "ERROR: cannot open $fn\n";
		  #_________________________________________________
		  print FILE "<HTML><head><meta http-equiv=\"content-type\" content=\"text/html; charset=UTF-8\"></head>";
          print FILE  "Running Log Analysis from <B>".$query->param("fromdate")."</B> to <B>".$query->param("todate")."</B><BR>";
          print FILE "for user $fil<BR>" if $fil ne "";
          print FILE $alog->getQueries($fil,$afilter);
      } else {
          open (FILE, ">>$fn") or warn "ERROR: cannot open $fn\n";
      }
      print FILE $headerfile;
    #  print FILE "filtering values under $filter<BR>" if defined $filter;
      print FILE $qlog->getQueries($fil,$gfilter,$rfilter,$pfilter) if $mode =~ /query/i;
      print FILE $weblog->getQueries($fil,$wifilter,$wffilter) if $mode =~ /image/i;
    #print "\n--",$fil,"--",$cid,"--",$filter,"--\n";

     if ($mode =~ /export/i) {
          print FILE $elog->getQueries($fil,$xifilter,$xffilter);
          print FILE $footerfile;
      }
	  print FILE "</HTML>";
      close(FILE);
    };


};
exit;

sub processFile {
  my $file = shift;
  my $mode = shift;
  my $content = shift;
  my @lns = split(/[\r\n]/,$content);
  open (FILE,$file);
  push @lns,<FILE>;

  for (@lns) {
  #1 compare date ranges
  my $line = $_;
  if ($line =~ /\t/) {
  	my @tmp = split (/\t/,$line);
  	next if (datecompare($tmp[0]) == -1);
  	next if ($tmp[15] =~ /debug/);
	print "$mode -- $tmp[16] \n" if $debug > 3;
	my $ipt = "";
	if ($tmp[4] =~ /^UNKNOWN$/i) {
	   &checkIPRanges($tmp[5]);
	} else {
	   &checkIPRanges($tmp[4]);
	}

	$qlog->add($line,$ipt) if ($mode =~ /query/i && $tmp[1] =~ /query/i);
	$alog->add($line,$ipt) if ($mode =~ /access/i && $tmp[16] =~ /success/ && $tmp[1] =~ /access/i);
	$elog->add($tmp[14],$ipt) if ($mode =~ /export/i && $tmp[16] =~ /export/i);
  } else {
  ### IIS log if
    next if $mode !~ /image/i;
   print "found weblog\n" if $debug > 2;
  if ($line =~ /^(\d\d\d\d-\d\d-\d\d\s)(.+)\s(\d+)\.(\d+)\.(\d+)\.(\d+)\s(.+)GET(.+)\s200/) {
   print "found IIS\n" if $debug > 2;
    next if (datecompare(my $dt = $1) == -1);
    my $year = $1;
    my $day = $2;
    my $month = $3;
    my $ip_ = "$3\.$4\.$5\.$6";
    my $url = $8;
    next if $url !~ /user/;
    my $ipt = &checkIPRanges($ip_);
    $weblog->add($url,$ipt);

} elsif ($_ =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)\s(.+)\[(.+)\]\s\"GET\s(.+)\sHTTP(.+)/) {
    print "APACHE $6 $10" if $debug > 2;
    my $ip_ = "$1\.$2\.$3\.$4";
    next if (datecompare(my $dt = $6) == -1);
    my $url = $7;
    next if $url !~ /user/;
    my $ipt = &checkIPRanges($ip_);
#print "adding";
    $weblog->add($url,$ipt);

  };
  };
  };
};



sub datecompare {
my $date = shift;
my ($dt,$err) = parsedate($date);

return 1 if ($dateend eq "" && $datestart eq "");
return -1 if ($err ne "" || $dt < $datestart || $dt > $dateend);
return 1;

};

sub addExport {

}


sub getLogDirs {
my $dirs = shift;
my $suffix = shift;
my @master;
$suffix =~ s/\s//ig;
$suffix =~ s/\,/\|/ig;
 for my $dir (split(/\t/,$dirs)) {
  if (!-d $dir ) {
    warn "cannot open dir $dir\n";
  } else {
   opendir(DIR, $dir) or warn "cannot open dir $dir\n";;
   my @d = readdir(DIR);
   closedir(DIR);
   for my $file (@d) {
     next if ($file =~ /^\./);
     $file = $dir.$file;
     next if (!-e $file);
     push @master,$file if $file =~ /($suffix)$/;
   };
  };
 };

#print @master;
return \@master;
};

sub checkIPRanges {
   my $ip = shift;
   if (defined $knownRanges{$ip}) {
        print "\t - already seen $ip\r\n" if $debug > 2;
        return $knownRanges{$ip};
    };

   next if $ip !~ /(\d)+\.(\d)+\.(\d)+\.(\d)+/;
#   print "valid IP - $ip\r\n";
#    implement check here to remove gateway (1) stuff;
    if (index($ip,"(") != -1) {
        $ip = substr($ip,0,index($ip,"("));
    };
#   print "valid IP - $ip\r\n";
     $ipv->set($ip);
   my @mp = $query->param("map");
#   print scalar(@mp);
   for my $vls (@mp) {
#    split (/\t/,)
     my ($vl1,$vl2) = split(/\:/,$vls);
     $ipr->set($vl1);
     my $t =$ipv->overlaps($ipr);
#     print "\t$t -> $vl1\r\n";
     if (defined $t && $t !=0) {
        print "_____found match for $ip -- $vls\r\n" if $debug > 2;
        $knownRanges{$ip} = $vl2;
     return $vl2;};

     };
    return undef;
#   };

};