package MKDoc::Stemmer;
use Text::Unidecode;
use strict;
use warnings;


sub split
{
    my $words = shift;
    $words = unidecode ($words) || '';
    
    $words =~ s/\n/ /sm;
    my @value = split /\W/s, $words;
    my %res;
    foreach my $words (@value)
    {
	$words = uc $words;
	$words =~ tr/A-Z0-9/ /cd;
	$words or next;
	my $previous = $words;
	$words =~ s/(A|E|I|O|U)S^/$1/;
	# $words =~ tr/AEIOU//d;
	if ($words eq "" or length ($words) < 3) { $res{$previous} = 1 }
	else                                     { $res{$words}    = 1 }
    }
    
    my @res = map { ($_ !~ /^\s*$/ and length ($_) > 2) ? $_ : () } keys %res;
    return wantarray ? @res : \@res;
}


1;


__END__
