sub split_on_comma()

in data/ami/ami_split_segments.pl [45:92]


sub split_on_comma {

   my ($text, $comma_times, $btime, $etime, $max_words_per_seg)= @_;
   my %comma_hash = %$comma_times;

   print "Btime, Etime : $btime, $etime\n";

   my $stime = ($etime+$btime)/2; #split time
   my $skey = "";
   my $otime = $btime;
   foreach my $k (sort {$comma_hash{$a} cmp $comma_hash{$b} } keys %comma_hash) {
      print "Key : $k : $comma_hash{$k}\n";
      my $ktime = $comma_hash{$k};
      if ($ktime==$btime) { next; }
      if ($ktime==$etime) { last; }
      if (abs($stime-$ktime)/2<abs($stime-$otime)/2) {
         $otime = $ktime;
         $skey = $k;
      }
   }

   my %transcripts = ();

   if (!($skey =~ /[\,][0-9]+/)) {
      print "Cannot split into less than $max_words_per_seg words! Leaving : $text\n";
      $transcripts{get_name($btime, $etime)}=$text;
      return %transcripts;
   }

   print "Splitting $text on $skey at time $otime (stime is $stime)\n";
   my @utts1 = split(/$skey\s+/, $text);
   for (my $i=0; $i<=$#utts1; $i++) {
     my $st = $btime;
     my $et = $comma_hash{$skey};
     if ($i>0) {
        $st=$comma_hash{$skey};
        $et = $etime;
     }
     my (@utts) = split (' ', $utts1[$i]);
     if ($#utts < $max_words_per_seg) {
        my $nm = get_name($st, $et);
        print "SplittedOnComma[$i]: $nm : $utts1[$i]\n";
        $transcripts{$nm} = $utts1[$i];
     } else {
        print 'Continue splitting!';
        my %transcripts2 = split_on_comma($utts1[$i], \%comma_hash, $st, $et, $max_words_per_seg);
        %transcripts = merge_hashes(\%transcripts, \%transcripts2);
     }