#!/usr/bin/perl -w use strict; ################################# # History: # # version 07 # * modified to allow input of non-positive durations (which are discarded) # * fixed bug in inferring file end from end of last reference time # # version 06 # modified to limit evaluation to only the time up to the last speech recorded # in the reference segmentation files. This prevents test system output from # artificially manipulating the evaluation score. # # version 05 # don't extend no-score zone beyond the last speaker event # # version 04 # * account for (correct) same-speaker overlap # * a few cosmetic fixes # # version 03 # * checked for undefined variables before math operations # * added optional output of speaker mapping information # # version 02 # * modified to read files from a list # * corrected model parameter selection for computing default error # * made inhibition of scoring on simultanous ref speakers an option # # version 01 # modified to inhibit scoring when multiple reference speakers are speaking # # version 00 # ################################# ###### # Intro my ($date, $time) = date_time_stamp(); print "Speaker Segmentation Evaluation -- run on $date at $time\n"; print "command line: ", $0, " ", join(" ", @ARGV), "\n"; my $usage = "\n\nUsage: $0 [-h] -r -t -c \n\n". "Description: This Perl script evaluates speaker segmentation performance.\n". "\n". "Required arguments:\n". " -r is a file containing a list of reference segmentation files\n". " -t is a file containing a list of segmentation files to be evaluated\n". "\n". "Optional arguments:\n". " -c is the no-score zone around reference segment boundaries\n". " (Segmentation output is not evaluated within +/- collar seconds of\n". " a reference segmentation boundary.) Default value is 0.25 seconds\n". " -s inhibits scoring when multiple reference speakers are speaking\n". " -e prints event sequence for each segmentation data file\n". " -f prints performance summary for each segmentation data file\n". " -m prints speaker mapping details for each segmentation data file\n". " -h prints this help message to STDOUT\n". "\n"; use vars qw ($opt_r $opt_t $opt_c $opt_s $opt_e $opt_f $opt_m $opt_h); use Getopt::Std; getopts ('r:t:c:sefmh'); die $usage if defined($opt_h); die "Error in command line: ref_list not defined$usage" unless defined $opt_r; die "Error in command line: tst_list not defined$usage" unless defined $opt_t; my $collar = defined($opt_c) ? $opt_c : 0.25; die "Collar ('$collar') must be non-negative$usage" unless $collar >= 0; my ($ref_list) = $opt_r; my ($tst_list) = $opt_t; ###### # Global variables my (%events); # integrated chronological sequences of events, ref and tst integrated my (%speaker_data); # speaker status and mapping data my (%seg_stats, %cum_stats); # the speaker segmentation performance statistics my (%match_time); # the table of cumulative time of match between ref and tst spkrs my $max_match; # max total cumulative time of match between mapped ref and tst spkrs my %C_model = (MISSED_SPEECH => 1, FALARM_SPEECH => 1, MISSED_SPEAKER => 1, FALARM_SPEAKER => 1, SPEAKER_ERROR => 1); ###### my ($ref_file, $tst_file); open (REF_LIST, $ref_list) or die "\nUnable to open ref file list '$ref_list'", $usage; open (TST_LIST, $tst_list) or die "\nUnable to open tst file list '$tst_list'", $usage; while ($ref_file=) { chomp $ref_file; $tst_file=, chomp $tst_file; undef %events; undef %speaker_data; get_segmentation_data ("REF", $ref_file); get_segmentation_data ("TST", $tst_file); add_no_scoring_zones (); print_events () if $opt_e; map_speakers (); print_speaker_map () if $opt_m; compute_error_statistics (); summarize_performance (\%seg_stats) if $opt_f; } print "Summary performance for all data\n"; summarize_performance (\%cum_stats); close (REF_LIST); close (TST_LIST); exit 0; ################################# sub get_segmentation_data { my ($type, $file) = @_; my ($tbeg, $tend, $spkr, %end_time); print "$type file = '$file'\n" if $opt_f; open (FILE, $file) or die "\nUnable to open segmentation data file '$file'", $usage; while () { next if /^\s*\#/; ($tbeg, $tend, $spkr) = split; next unless defined $spkr; die "\nNegative segment start time ('$tbeg') for speaker '$spkr' in file '$file'$usage" if $tbeg < 0; die "\nDuplicate segment start time ('$tbeg') for speaker '$spkr' in file '$file'$usage" if defined $end_time{$spkr}{$tbeg}; $end_time{$spkr}{$tbeg} = $tend if $tend > $tbeg; } #eliminate segment overlap within speakers foreach $spkr (keys %end_time) { $tend = 0; foreach $tbeg (sort {$a<=>$b} keys %{$end_time{$spkr}}) { if ($tbeg >= $tend) { $events{$tend}{$type}{$spkr} = "STOP" if $tend > 0; $events{$tbeg}{$type}{$spkr} = "START"; } $tend = max($tend, $end_time{$spkr}{$tbeg}); } $events{$tend}{$type}{$spkr} = "STOP"; } close (FILE); } ################################# sub add_no_scoring_zones { my ($time, $tbeg, $tend, $tend_ref, %end_time); foreach $time (keys %events) { next unless defined $events{$time}{REF}; $end_time{$time-$collar} = $time+$collar; $tend_ref = $time; } $tend = 0; foreach $tbeg (sort {$a<=>$b} keys %end_time) { if ($tbeg > $tend) { $events{$tend}{SCORE} = defined $events{$tend}{SCORE} ? undef : "START"; $events{$tbeg}{SCORE} = "STOP"; } $tend = max($tend, $end_time{$tbeg}); } $events{$tend_ref}{SCORE} = "STOP"; } ################################# sub compute_error_statistics { my ($event, $time, $tnext); my ($spkr, $ref_spkr, $tst_spkr, $score, $ref_speakers, $tst_speakers, $seg_dur); #initialize speaker status foreach $ref_spkr (keys %{$speaker_data{REF}}) { delete $speaker_data{REF}{$ref_spkr}{SPEAKING}; $tst_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; $speaker_data{TST}{$tst_spkr}{MAP} = $ref_spkr if defined $tst_spkr; } foreach $tst_spkr (keys %{$speaker_data{TST}}) { delete $speaker_data{TST}{$tst_spkr}{SPEAKING}; } undef %seg_stats; $time = 0; foreach $tnext (sort {$a<=>$b} keys %events) { $seg_dur = $tnext - $time; $seg_stats{TOTAL_TIME} = $tnext if defined $events{$tnext}{REF}; $seg_stats{TOTAL_SPEECH} += $seg_dur if $ref_speakers; #compute scores for this segment {if ($score) { next if $opt_s and $ref_speakers >= 2; $seg_stats{SCORED_TIME} += $seg_dur; $seg_stats{SCORED_SPEECH} += $seg_dur if $ref_speakers; $seg_stats{MISSED_SPEECH} += $seg_dur if $ref_speakers and not $tst_speakers; $seg_stats{FALARM_SPEECH} += $seg_dur if $tst_speakers and not $ref_speakers; foreach $ref_spkr (keys %{$speaker_data{REF}}) { if ($speaker_data{REF}{$ref_spkr}{SPEAKING}) { $speaker_data{REF}{$ref_spkr}{SCORED_SPEECH} += $seg_dur; $seg_stats{SCORED_SPEAKER} += $seg_dur; next unless $tst_speakers; $tst_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; if (defined $tst_spkr) { $seg_stats{SPEAKER_ERROR} += $seg_dur unless $speaker_data{TST}{$tst_spkr}{SPEAKING}; } else { $seg_stats{MISSED_SPEAKER} += $seg_dur; } } } foreach $tst_spkr (keys %{$speaker_data{TST}}) { if ($speaker_data{TST}{$tst_spkr}{SPEAKING}) { $ref_spkr = $speaker_data{TST}{$tst_spkr}{MAP}; $seg_stats{FALARM_SPEAKER} += $seg_dur unless defined $ref_spkr; } } }} #compute the characteristics of the next segment $event = $events{$time=$tnext}; $score = $event->{SCORE} eq "START" if defined $event->{SCORE}; if (defined $event->{REF}) { foreach $spkr (keys %{$event->{REF}}) { $speaker_data{REF}{$spkr}{SPEAKING} = $event->{REF}{$spkr} eq "START"; } } if (defined $event->{TST}) { foreach $spkr (keys %{$event->{TST}}) { $speaker_data{TST}{$spkr}{SPEAKING} = $event->{TST}{$spkr} eq "START"; } } $ref_speakers = speakers_speaking ($speaker_data{REF}); $tst_speakers = speakers_speaking ($speaker_data{TST}); } #accumulate statistics foreach my $type (keys %seg_stats) { $cum_stats{$type} += $seg_stats{$type}; } } ################################# sub speakers_speaking { my ($spkr_data) = @_; my $speakers = 0; foreach my $spkr (keys %{$spkr_data}) { $speakers++ if $spkr_data->{$spkr}{SPEAKING}; } return $speakers; } ################################# sub summarize_performance { my ($stats) = @_; my (%prob, %cost, $type, $default_cost, @spkr_times, $total_cost); foreach $type ("MISSED_SPEECH", "FALARM_SPEECH") { $stats->{$type} = 0 unless defined $stats->{$type}; $prob{$type} = $stats->{$type}/$stats->{SCORED_SPEECH}; $cost{$type} = $C_model{$type}*$prob{$type}; } foreach $type ("MISSED_SPEAKER", "FALARM_SPEAKER", "SPEAKER_ERROR") { $stats->{$type} = 0 unless defined $stats->{$type}; $prob{$type} = $stats->{$type}/$stats->{SCORED_SPEAKER}; $cost{$type} = $C_model{$type}*$prob{$type}; } foreach my $spkr (sort keys %{$speaker_data{REF}}) { push @spkr_times, $speaker_data{REF}{$spkr}{SCORED_SPEECH}; } $default_cost = $C_model{SPEAKER_ERROR}*(1 - max(@spkr_times)/$stats->{SCORED_SPEAKER}); $default_cost = max($default_cost,0.001); printf " TOTAL TIME =%8.2f secs\n", $stats->{TOTAL_TIME}; printf " TOTAL SPEECH =%8.2f secs (%5.1f percent of total time)\n", $stats->{TOTAL_SPEECH}, 100*$stats->{TOTAL_SPEECH}/$stats->{TOTAL_TIME}; printf "SCORED SPEECH =%8.2f secs (%5.1f percent of total speech)\n", $stats->{SCORED_SPEECH}, 100*$stats->{SCORED_SPEECH}/$stats->{TOTAL_SPEECH}; printf "MISSED SPEECH =%8.2f secs (%5.1f percent of scored speech), cost =%7.4f (%5.1f percent of default)\n", $stats->{MISSED_SPEECH}, 100*$prob{MISSED_SPEECH}, $cost{MISSED_SPEECH}, 100*$cost{MISSED_SPEECH}/$default_cost; printf "FALARM SPEECH =%8.2f secs (%5.1f percent of scored speech), cost =%7.4f (%5.1f percent of default)\n", $stats->{FALARM_SPEECH}, 100*$prob{FALARM_SPEECH}, $cost{FALARM_SPEECH}, 100*$cost{FALARM_SPEECH}/$default_cost; printf "SCORED SPEAKER TIME =%8.2f secs (%5.1f percent of scored speech)\n", $stats->{SCORED_SPEAKER}, 100*$stats->{SCORED_SPEAKER}/$stats->{SCORED_SPEECH}; printf "MISSED SPEAKER TIME =%8.2f secs (%5.1f percent of scored speaker time), cost =%7.4f (%5.1f percent of default)\n", $stats->{MISSED_SPEAKER}, 100*$prob{MISSED_SPEAKER}, $cost{MISSED_SPEAKER}, 100*$cost{MISSED_SPEAKER}/$default_cost; printf "FALARM SPEAKER TIME =%8.2f secs (%5.1f percent of scored speaker time), cost =%7.4f (%5.1f percent of default)\n", $stats->{FALARM_SPEAKER}, 100*$prob{FALARM_SPEAKER}, $cost{FALARM_SPEAKER}, 100*$cost{FALARM_SPEAKER}/$default_cost; printf " SPEAKER ERROR TIME =%8.2f secs (%5.1f percent of scored speaker time), cost =%7.4f (%5.1f percent of default)\n", $stats->{SPEAKER_ERROR}, 100*$prob{SPEAKER_ERROR}, $cost{SPEAKER_ERROR}, 100*$cost{SPEAKER_ERROR}/$default_cost; foreach $type ("MISSED_SPEECH", "FALARM_SPEECH", "MISSED_SPEAKER", "FALARM_SPEAKER", "SPEAKER_ERROR") { $total_cost += $cost{$type}; } printf "%79.79s =%7.4f (%5.1f percent of default)\n", "TOTAL COST", $total_cost, 100*$total_cost/$default_cost; } ################################# sub map_speakers { my ($event, $time, $tnext); my ($spkr, $ref_spkr, $tst_spkr, $score); #compute the cumulative match time between ref and tst speakers undef %match_time; $time = 0; foreach $tnext (sort {$a<=>$b} keys %events) { if ($score) { foreach $ref_spkr (keys %{$speaker_data{REF}}) { next unless $speaker_data{REF}{$ref_spkr}{SPEAKING}; foreach $tst_spkr (keys %{$speaker_data{TST}}) { next unless $speaker_data{TST}{$tst_spkr}{SPEAKING}; $match_time{$ref_spkr}{$tst_spkr} += $tnext-$time; } } } $event = $events{$tnext}; $score = $event->{SCORE} eq "START" if defined $event->{SCORE}; if (defined $event->{REF}) { foreach $spkr (keys %{$event->{REF}}) { $speaker_data{REF}{$spkr}{SPEAKING} = $event->{REF}{$spkr} eq "START"; } } if (defined $event->{TST}) { foreach $spkr (keys %{$event->{TST}}) { $speaker_data{TST}{$spkr}{SPEAKING} = $event->{TST}{$spkr} eq "START"; } } $time = $tnext; } #find the mapping that maximizes the cumulative match time between ref and tst spkrs my @ref_speakers = keys %{$speaker_data{REF}}; $max_match = -1; best_match (\@ref_speakers, 0); } ################################# sub best_match { my ($ref_spkrs, $cum_match) = @_; my ($ref_spkr, $tst_spkr); my (@ref_remaining) = @$ref_spkrs; if (defined ($ref_spkr = shift @ref_remaining)) { best_match (\@ref_remaining, $cum_match+0.001); # match if this ref unmapped foreach $tst_spkr (keys %{$speaker_data{TST}}) { next if defined $speaker_data{TST}{$tst_spkr}{MAP}; $speaker_data{TST}{$tst_spkr}{MAP} = $ref_spkr; my $cand_match = defined $match_time{$ref_spkr}{$tst_spkr} ? $match_time{$ref_spkr}{$tst_spkr} : 0; best_match (\@ref_remaining, $cum_match+$cand_match); delete $speaker_data{TST}{$tst_spkr}{MAP}; } } elsif ($cum_match > $max_match) { $max_match = $cum_match; foreach $ref_spkr (keys %{$speaker_data{REF}}) { delete $speaker_data{REF}{$ref_spkr}{MAP}; } foreach $tst_spkr (keys %{$speaker_data{TST}}) { next unless defined $speaker_data{TST}{$tst_spkr}{MAP}; $ref_spkr = $speaker_data{TST}{$tst_spkr}{MAP}; $speaker_data{REF}{$ref_spkr}{MAP} = $tst_spkr; } } } ################################# sub print_speaker_map { my ($ref_spkr, $tst_spkr); foreach $ref_spkr (keys %{$speaker_data{REF}}) { $tst_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; print "'$ref_spkr' => ", defined $tst_spkr ? "'$tst_spkr'\n" : "\n"; foreach $tst_spkr (keys %{$speaker_data{TST}}) { $time = $match_time{$ref_spkr}{$tst_spkr}; printf "%9.2f secs matched to '$tst_spkr'\n", defined $time ? $time : 0; } } } ################################# sub print_events { foreach my $time (sort {$a<=>$b} keys %events) { print "$time, SCORE, $events{$time}{SCORE}\n" if defined $events{$time}{SCORE}; if (defined $events{$time}{REF}) { print "$time, REF"; foreach my $spkr (keys %{$events{$time}{REF}}) { print ", ($spkr $events{$time}{REF}{$spkr})"; } print "\n"; } if (defined $events{$time}{TST}) { print "$time, TST"; foreach my $spkr (keys %{$events{$time}{TST}}) { print ", ($spkr $events{$time}{TST}{$spkr})"; } print "\n"; } } } ################################# sub date_time_stamp { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my ($date, $time); $time = sprintf "%2.2d:%2.2d:%2.2d", $hour, $min, $sec; $date = sprintf "%4.4s %3.3s %s", 1900+$year, $months[$mon], $mday; return ($date, $time); } ################################# sub max { my ($max, $x); $max = shift @_; foreach $x (@_) { if ($x > $max) {$max = $x} } return $max; } ################################# sub min { my ($min, $x); $min = shift @_; foreach $x (@_) { if ($x < $min) {$min = $x} } return $min; } #################################