#!/usr/local/bin/perl

#####################################################################
## Version 0.1  hp 20.7.2005 adding options for eNTERFACE/MaxBrola
##              derived from mary2mbrol.pl
#####################################################################

$PROGNAME = "$0";
$VERSION  = "0.1";

$DOKU = "
$PROGNAME  Version: $VERSION

USAGE: 
$PROGNAME (-h) (-d) (-m | -l| -w) Mary-XML-file

(e.g. see maryclient.pl for producing Mary-XML-file)

If INPUT_FILE is missing, <STDIN> will be used.

DESCRIPTION 

Simple extraction of all <s> <syllable> <ph> elements and output as 
one <syllable> per line.
At sentence boundaries an empty line is inserted. 

INPUT:
Mary-XML-file containing <ph> (phonemic) info

OPTIONS

-h    print this message
-d    keep original durations

Encoding of End-of-Line:
-m    Mac-version:     use CR at end of line
-l    Linux-version:   use LF at end of line
-w    Windows-version: use CRLF at end of line

Hannes Pirker, August 2005
";

@args = @ARGV;
# require "$BINDIR/date.perl";

use Getopt::Std;
getopts('htdmlw');

#### getting today's date ######## thanks to jm 
($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);
# $logstring = sprintf("%02d/%02d/%02d-%02d:%02d:%02d - %s from %s -"
#                      ,$year,$mon+1,$mday,$hour,$min,$sec
#                      ,$usercode,$ENV{'REMOTEHOST'});
@monthnames=('JAN','FEB','MAR','APR','MAY','JUN',
             'JUL','AUG','SEP','OCT','NOV','DEC');
$month=$monthnames[$mon];
$today=sprintf("%02d-%s-%04d",$mday,$month,$year+1900);

##############
## Infos on the perl-DOM-module:
## http://search.cpan.org/src/ENNO/libxml-enno-1.02/html/XML/DOM.html
##############
use XML::DOM;

$NEWLINE = "\r"; 

if ($opt_h) {die $DOKU};
if ($opt_m) {$NEWLINE = "\r"}; 
if ($opt_l) {$NEWLINE = "\n"}; 
if ($opt_w) {$NEWLINE = "\r\n"}; 


my $dom = new XML::DOM::Parser;                     # create parser
my $doc;                                            # the dom object

if( $ARGV[0]) { $infile = $ARGV[0]; 
		$doc= $dom->parsefile( $ARGV[0]); } # create the dom object
else          { $infile = "STDIN";
		$doc= $dom->parse( \*STDIN);      }

$d="";
for $s ($doc->getElementsByTagName("s")) {
  print "#### $0 @args -- $today ####$NEWLINE";
  for $sy ($s->getElementsByTagName("syllable")) {
    $sep = ""; 
    for $ph ($sy->getElementsByTagName("ph")) {
      $p  = $ph->getAttribute("p");
    if ($opt_d) { 
       $d  = " " . $ph->getAttribute("d");
    }
    # $f0 = $ph->getAttribute("f0"); # f0 not used 
    print "$sep$p$d";
    $sep = " ";
  }
    print "$NEWLINE";
  }
  # print "$NEWLINE";
}

