#!/usr/bin/perl -w
# MetaphoneSuggest - suggest links for typographical and other errors from 404s
use strict;
use CGI::Pretty ':standard';	#standard cgi stuff
use Text::Metaphone;

my @suggestLinks = (); # suggested link list
my %mt = ();           # filename, score, metaphone code hash

my $origLink = substr($ENV{REDIRECT_URL},1); # remove leading /
$origLink  =~ s/\.html//g;                   # remove trailing .html

open(MPH,'metaphonesScore.txt') or die "can't open metaphonesScore.txt";
  while(my @slPart = split '###', <MPH>)
  {
    $slPart[0] =~ s/ //g; #remove trailing space
    $mt{$slPart[0]}{ score } = $slPart[1];
    $mt{$slPart[0]}{ metaphones } = $slPart[2];
  }
close(MPH);

push @suggestLinks, sortResults( directorySplitTest( $origLink ) );
push @suggestLinks, sortResults( combinedTest( $origLink ) );
push @suggestLinks, sortResults( containsTest( $origLink ) );

# from the book - unique-ify the array
my %seen = ();
@suggestLinks = grep{ ! $seen{$_}++ } @suggestLinks ;

print header;
print qq{Error 404: The file requested [$ENV{REDIRECT_URL}] is unavailable.<BR>};
next if( @suggestLinks == 0 );

print qq{Please try one of the following pages:<BR>};
for my $link( @suggestLinks ){
  $link = substr($link,index($link,'./')+1);
  print qq{<a href="$link">$link</a><BR>};
}

## end main program, begin subroutines

sub sortResults
{
  my @scored = @_;
  my @idx = (); #temporary index for sorting
  for my $entry( @scored ){
    # create an index of scores
    my $item =  substr($entry,0,index($entry,'##'));
    push @idx, $item;
  }
  
  # sort the index of scores
  my @sorted = @scored[ sort { $idx[$b] <=> $idx[$a] } 0 .. $#idx ];

  return( @sorted );

}#sortResults

sub containsTest
{
  my @matchRes = ();
  my $inLink = $_[0];
  for my $fileName ( keys %mt )
  {
    my $inLinkMeta = Metaphone($inLink);

    my $metaList =  $mt{$fileName}{metaphones};

    next if( $metaList !~ /$inLinkMeta/i );
    push @matchRes, "$mt{$fileName}{score} ## $fileName";
  }#for filename keys in metaphone hash
  return(@matchRes); 

}#containsTest

sub combinedTest
{
  my @matchRes = ();
  my $inLink = $_[0];
  for my $fileName ( keys %mt )
  {
    my $inLinkMeta = Metaphone($inLink);

    # smoosh all of the keys together, removing spaces and trailing newline
    my $metaList =  $mt{$fileName}{metaphones};
    $metaList =~ s/( |\n)//g;

    next if( $metaList !~ /(\b$inLinkMeta\b)/i );
    push @matchRes, "$mt{$fileName}{score} ## $fileName";
  }#for filename keys in metaphone hash

  return(@matchRes); 

}#combinedTest

sub directorySplitTest
{
  my @matchRes = ();
  my $inLink = $_[0];
  for my $fileName ( keys %mt )
  {
    my @inLinkMetas = ();
    for my $inP ( split '\/', $inLink ){ push @inLinkMetas, Metaphone($inP) }

    my @metaList = split ' ', $mt{$fileName}{metaphones};
    next if( @metaList != @inLinkMetas );

    my $pos = 0;
    my $totalMatch = 0;
    for( @metaList )
    {
      $totalMatch++ if( $metaList[$pos] =~ /(\b$inLinkMetas[$pos]\b)/i );
      $pos++;
    }#for meatlist

    next if( $totalMatch != @metaList );
    push @matchRes, "$mt{$fileName}{score} ## $fileName";
  
  }#for keys in metaphone hash

  return( @matchRes );

}#directorySplitTest
