package Perlbox::Vocabulary::VocabularyAdd;

#==============================================================================
#=== License
#==============================================================================
# Copyright (c) Date Fri Nov  8 21:11:38 MST 2002
# Author Shane Mason <me@perlbox.org>

#This file is part of Perlbox Voice.

#Perlbox Voice is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or
#(at your option) any later version.

#Perlbox Voice is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details.

#You should have received a copy of the GNU General Public License
#along with Foobar; if not, write to the Free Software
#Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


require 5.002;
use strict;

use constant TRUE  =>1;
use constant FALSE =>0;
use Cwd;
use Perlbox::ThirdParty::Config::Simple;


my $config;

sub new {
    my $class = shift;
    my $self = {
        @_,
    };

    bless ( $self, $class );

    $self->init();
    return $self;
}

sub init {
     my $self = shift;
     # Create a config
	 $config = Perlbox::ThirdParty::Config::Simple->new( $ENV{"HOME"}."/.perlbox-voice/Listener.conf" );
}

###############################################################
#sub create_language_model() expects an reference to an array
#that is a list of string (phrases) to be used in the new language
#model. Understand, this is a hack. but it works for now.
###############################################################
sub create_language_model{
   if(@_ != 3){
      warn "New language model could not be created: not enough arguments at Perlbox::Vocabulary::VocabularyAdd->create_language_model()\n";
      return "COULD NOT CREATE MODEL\n";
   }
   my $self = shift;
   my $phrase_array = shift;
   my $execute_array = shift;
   my $index;


   #now here we make the language model

   my $pblibpath = $config->param("path_section.perlbox_lib");
   #arguments for SimpleLM.pl
   my $bin_path = $pblibpath."/bin/quick_lm.pl";
   my $CMU_DICT = $pblibpath."/lib/cmudict_sphinx";
   my $input_sent = $ENV{"HOME"}."/.perlbox-voice/tmp/current.sent";
   my $output_lm = $ENV{"HOME"}."/.perlbox-voice/tmp/current.lm";
   my $output_dict = $ENV{"HOME"}."/.perlbox-voice/tmp/current.dict";

   #first write the sentence file
   open(OUTPUT,">$input_sent");

   for(my $i=0; $i<@$phrase_array; $i++){
      print OUTPUT "<s> " . $phrase_array->[$i] . " </s>\n";
   }

   close OUTPUT;

   open(LM,"perl $bin_path -s $input_sent -o $output_lm|");
   while(<LM>){
     print $_;
   }
   close LM;
   
   #now create a dictionary:

    #read the big dictionalry into memory
    open (DICT,"$CMU_DICT");
    my @dict;
    while(<DICT>){
        push(@dict,$_);
    }
    close (DICT);

    my @already_added;
    #now look for prounciations in the big dictionary
    open (DOUT,">$output_dict");
    for(my $i=0; $i<@$phrase_array; $i++){
      my $text = $phrase_array->[$i];
        chomp $text;
        $text=uc($text);
	#added Nov 15 2003: Shane C. Mason
	#if there are multiple words in the text(like a phrase), we need to add them
	#so first, split by space, then make sure that these words have not been added
	#because identicle entries cause a hash error when sphinx loads them
	my @elements=split(" ",$text);
	foreach my $thisword (@elements){
	   my $exists_flag=0;
	   foreach my $existing_word (@already_added){
             if($thisword eq $existing_word){
               $exists_flag=1;
	     }
	   }
      if(!$exists_flag){
	      push(@already_added,$thisword);
              foreach my $input (@dict){
                 if($input =~ /^$thisword[\s|\(]/){  #match $text\s and $text(
                    print DOUT $input;
                 }
              }
            }
        }

    }

    close (DOUT);
    
    #now move the files:
    system ("mv " .$config->param("path_section.dict")." ".$config->param("path_section.dict").".save");
    system ("mv " .$config->param("path_section.language_model")." ".$config->param("path_section.language_model").".save");
    system ("mv " .$config->param("path_section.sent")." ".$config->param("path_section.sent").".save");

    system ("mv $output_dict ".$config->param("path_section.dict"));
    system ("mv $output_lm " .$config->param("path_section.language_model"));
    system ("mv $input_sent " .$config->param("path_section.sent"));



    return TRUE;

 }



###############################################################
#sub validate_entry() tests a user input for
#correctness with our model
#Reurn true if safe, false if not
###############################################################
sub validate_entry(){
    my $self = shift;
    my $input_line = shift;
    
    #test to make sure there are no non-alpha chars
    $input_line=lc($input_line);

    if($input_line =~ /[^a-z\s]/ or (not($input_line =~ /[a-z]/))){
      return FALSE;
    }
    else{
      return TRUE;
    }
}


1;
=head1 NAME

 Perlbox::VocabularyAdd


=head Version

 This document refers to version 0.01 of Perlbox Voice Application Framework,
 release 3 18 2003

=head1 SYNOPSIS

 This module is interfaced from Perlbox::VoiceServer.

=head1 AUTHOR

 Shane C. Mason <me@perlbox.org>

 Special thanks to: Eric Andrechek (eric at openthought dot net)

 http://perlbox.org

=head1 COPYRIGHT

 Copyright (c) Date Fri Nov  8 21:11:38 MST 2002
 Author Shane Mason <me@perlbox.org>

 This file is part of Perlbox Voice.

 Perlbox Voice is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 Perlbox Voice is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

=cut



