#!/usr/bin/perl -w use strict; ################################# # History: # # version 21 # * handle ctm file records where the speaker ID is "null" # # version 20 # * accept either -x or -s to exclude overlap regions # # version 19 (Greg Sanders) # * added option -g to score using speaker type as if it were # the speaker ID # * error fixes in the word-based scoring of speaker type # when -g has not been specified # * added option -n to have the word-based output exclude tokens # that are labeled as noscore in the .ctm files (parallel to STT) # * more checking to avoid divide-by-zero errors # * reorganizations of output formatting for easier reading # * problem: # Word midpoint times are events in word-based scoring and # are tracked in a hash, with the result that two words with # the same midpoint time (as can occur in an overlap region) # were not both tracked (hash entry gets clobbered). # Note that this is a very rare situation. # semi-fix: # Increase midpoint time of any duplicates of a mid-point time by # increments of 0.000001 seconds until no longer a duplicate # * a parallel problem existed in speaker-type mode, where overlapping # speakers could begin speaking simultaneously, and the same sort # of semi-fix has been made # * all word-based metrics now take account of speaker identities and # speaker types in counting word-based errors # * TOTAL_TIME from start of first token rather than from time zero # # version 18 (Greg Sanders) # * combined changes introduced in versions 16 and 17 # * fixed a problem with undefined variables from version 16 # * expanded range of numbers being tracked and printed out # * added options to score # - all except overlaps, # - overlaps only, or # - all data (the default). # (In the process, I renamed option -s to option -x) # * additional bulletproofing against divide-by-zero errors # when data being scored has no scored words or no scored time (etc.) # # version 17 # * gender analysis added # * fixed error in sub add_collar_no_scoring_zones # * Note: version 17 omits the word-based scoring option from version 16 # # version 16 # word-based scoring option added. # # version 15 # * scoring control added, using UEM file information. (Also, a change # was made so that when evaluation partitions are NOT specified, the # total time is measured from the beginning to the end of reference # speech rather than from time = 0 to the end of reference speech.) # # version 14 # overall speaker diarization error printed out # # version 13 # bug fix to accommodate speaker ID = "0" # # version 12 # * converted to the MDTM format as specified in the EARS 2003 # evaluation plan # * added branch-and-bound speed-up to the speaker mapping # # version 11 # * Errors labeled as "MISSED_SPEECH", "FALARM_SPEECH", "MISSED_SPEAKER" # "FALARM_SPEAKER", or "SPEAKER_ERROR" # # version 10 # * change in decision rule to count each speaker segment that is in error # as only one error, regardless of whether the speaker is mapped or not. # # version 09 # * change in decision rule: # - eliminate speech activity detection scoring # - simplify speaker diarization scoring to two types only: # 1 missing a speaker # 2 falsely declaring a speaker # # version 08 # * fix processing for zero collar value # # version 07 # * modified to allow input of non-positive durations (which are discarded) # * fixed bug in inferring file end from end of last reference time # # version 06 # modified to limit evaluation to only the time up to the last speech recorded # in the reference diarization files. This prevents test system output from # artificially manipulating the evaluation score. # # version 05 # don't extend no-score zone beyond the last speaker event # # version 04 # * account for (correct) same-speaker overlap # * a few cosmetic fixes # # version 03 # * checked for undefined variables before math operations # * added optional output of speaker mapping information # # version 02 # * modified to read files from a list # * corrected model parameter selection for computing default error # * made inhibition of scoring on simultanous ref speakers an option # # version 01 # modified to inhibit scoring when multiple reference speakers are speaking # # version 00 # ################################# ###### # Intro my ($date, $time) = date_time_stamp(); print "\nSpeaker Diarization Evaluation -- run on $date at $time\n"; print "\nCommand line: ", $0, " ", join(" ", @ARGV), "\n\n"; my $usage = "\n\nUsage: $0 [-h] -r -t -c \n\n". "Description: This Perl script evaluates speaker diarization performance.\n". "\n". "Required arguments:\n". " -r is a file containing the reference speaker diarization\n". " data, in MDTM format\n". " -t is a file containing the system output speaker diarization\n". " data, in MDTM format\n". "\n". "Optional arguments:\n". " -c is the no-score zone around reference segment boundaries\n". " (Diarization output is not evaluated within +/- collar seconds of\n". " a reference segment boundary.) Default value is 0.25 seconds\n". " -u is a file containing UEM times to control scoring\n". " -w is a file containing a reference transcription of the\n". " test data, in CTM format. This transcript is used for word-based\n". " scoring of the diarization, in which the errors are accrued in terms\n". " of (reference) words rather than in terms of time.\n". " -e prints event sequence for each diarization source file\n". " -f prints performance summary for each diarization source file\n". " -g runs in speaker type (gender) scoring mode.\n". " Note that this mode treats speaker type as the speaker ID.\n". " -m prints speaker mapping details for each diarization source file.\n". " -n omits, from word-based scores, any \"noscore\" tokens in .ctm file.\n". " -o scores only areas where multiple reference speakers are speaking.\n". " -x excludes areas where multiple reference speakers are speaking.\n". " Note: the default (neither -o nor -x) is to score all areas\n". " Note also that -o and -x are mutually exclusive\n". " -s exactly equivalent to -x\n". " -h prints this help message to STDOUT\n". "\n"; use vars qw ($opt_r $opt_t $opt_c $opt_u $opt_w $opt_e $opt_f $opt_g $opt_h $opt_m $opt_n $opt_o $opt_s $opt_x); use Getopt::Std; getopts ('r:t:c:u:w:efghmnosx'); die $usage if defined($opt_h); die "$usage Error in command line: cannot specify both -o and -s\n\n" if defined($opt_o) && defined($opt_s); die "$usage Error in command line: cannot specify both -o and -x\n\n" if defined($opt_o) && defined($opt_x); die "$usage Error in command line: ref_file not defined\n\n" unless defined $opt_r; die "$usage Error in command line: sys_file not defined\n\n" unless defined $opt_t; my $collar = defined($opt_c) ? $opt_c : 0.25; die "$usage Error in command line: Collar ('$collar') must be non-negative\n\n" unless $collar >= 0; my ($ref_file) = $opt_r; my ($sys_file) = $opt_t; my ($uem_file) = $opt_u; my ($ctm_file) = $opt_w; my $miss_name = " MISS"; my $fa_name = " FALSE ALARM"; ###### # Global variables my $exp_id = ""; my $inputs = ""; my $max_match; # max total cumulative time of match between mapped ref and sys spkrs my %speaker_data; # speaker status and mapping data my $best_score; my $TOTAL_TIME_START = 0.0; { my (%events); # integrated chronological sequences of events, ref and sys integrated my (%spkr_info); # information and attributes for ref and sys output speakers get_mdtm_data ("REF", $ref_file, \%events, \%spkr_info); get_mdtm_data ("SYS", $sys_file, \%events, \%spkr_info); evaluate ($uem_file, $ctm_file, \%events, \%spkr_info); } ################################# sub evaluate { my ($uem_file, $ctm_file, $events, $spkr_info) = @_; my (%seg_stats, %cum_stats); # the speaker diarization performance statistics my ($file, $chnl, $chnl_events, $chnl_spkrs, $chnl_stats, %eval_segs, %words, %time_info); get_uem_data ($uem_file, \%eval_segs); # ref data undef %words; get_ctm_data ($ctm_file, \%words); # ref data foreach $file (sort keys %$events) { foreach $chnl (sort keys %{$events->{$file}}) { undef %seg_stats; undef %speaker_data; undef %time_info; $chnl_events = $events->{$file}{$chnl}; add_collar_no_scoring_zones ($eval_segs{$file}{$chnl}, $chnl_events); add_words ($words{$file}{$chnl}, $chnl_events); if ($opt_f) { print "\n\n----------------------------------------------------------------------------\n"; print "\nPerformance for file $file, channel $chnl:\n" if $opt_f; } else { print "\n ---------------- processing file $file, channel $chnl ------------------\n" } print_events ($chnl_events) if $opt_e; map_speakers ($chnl_events, \%time_info); print_speaker_map (\%time_info) if $opt_m; compute_error_statistics ($chnl_events, $spkr_info->{$file}{$chnl}, \%seg_stats, \%cum_stats); summarize_performance (\%seg_stats) if $opt_f; } } print "\nSummary performance for all data:\n"; print $exp_id; print $inputs; summarize_performance (\%cum_stats); } ################################# sub get_mdtm_data { my ($class, $file, $events, $spkr_info) = @_; my ($record, @fields, %end_time); my ($chnl, $tbeg, $tdur, $tend, $spkr, $actualSpkr, $mode, $conf, $subt); print "$class file = '$file'\n" if $opt_f; open DATA, $file or die "\n$usage\nFATAL ERROR: unable to open MDTM file '$file'\n\n"; while ($record = ) { $exp_id .= $record if $class eq "SYS" and $record =~ /^\s*;;\s*EXP-ID:\s*/i; $inputs .= $record if $class eq "SYS" and $record =~ /^\s*;;\s*INPUTS:\s*/i; chomp $record; next if $record =~ /^\s*([\#;]|$)/; @fields = split /\s+/, $record; die "\n\nFATAL ERROR: insufficient number of fields in MDTM record '$record'\n\n" if @fields < 8; $file = shift @fields; $chnl = shift @fields; $tbeg = shift @fields; $tdur = shift @fields; $mode = shift @fields; $conf = shift @fields; $subt = shift @fields; if ($opt_g) { $actualSpkr = shift @fields; $spkr = $subt; } else { $spkr = shift @fields; } next unless $mode eq "speaker"; $tbeg = $tbeg + 0; $tend = $tbeg + $tdur; die "\n$usage FATAL ERROR: Negative segment start time ('$tbeg') for speaker '$spkr' in record '$record'\n\n" if $tbeg < 0; if ($opt_g) { while (defined $end_time{$file}{$chnl}{$spkr}{$tbeg}) { $tbeg += 0.000001; $tdur -= 0.000001; } } else { die "\n$usage FATAL ERROR: Duplicate segment start time ('$tbeg') for speaker '$spkr' in record '$record'\n\n" if (defined $end_time{$file}{$chnl}{$spkr}{$tbeg}) } $end_time{$file}{$chnl}{$spkr}{$tbeg} = $tend if $tend > $tbeg; $spkr_info->{$file}{$chnl}{$class}{$spkr}{TIME} += $tdur; $spkr_info->{$file}{$chnl}{$class}{$spkr}{TYPE} = $subt unless defined $spkr_info->{$file}{$chnl}{$class}{$spkr}{TYPE}; if ($opt_g) { $spkr_info->{$file}{$chnl}{$class}{$actualSpkr}{ACTTYPE} = $subt unless defined $spkr_info->{$file}{$chnl}{$class}{$actualSpkr}{ACTTYPE}; $spkr_info->{$file}{$chnl}{$class}{"null"}{ACTTYPE} = "undefined"; } die "\n$usage FATAL ERROR: Speaker type conflict for speaker '$spkr', type '$subt'\n in record '$record'\n\n" if $subt ne $spkr_info->{$file}{$chnl}{$class}{$spkr}{TYPE}; } close (DATA); #eliminate segment overlap within speakers foreach $file (keys %end_time) { foreach $chnl (keys %{$end_time{$file}}) { foreach $spkr (keys %{$end_time{$file}{$chnl}}) { $tend = 0; foreach $tbeg (sort {$a<=>$b} keys %{$end_time{$file}{$chnl}{$spkr}}) { if ($tbeg >= $tend) { $events->{$file}{$chnl}{$tbeg}{$class}{$spkr} = "START"; $events->{$file}{$chnl}{$tend}{$class}{$spkr} = "STOP" if $tend > 0 and not defined $events->{$file}{$chnl}{$tend}{$class}{$spkr}; } $tend = max($tend, $end_time{$file}{$chnl}{$spkr}{$tbeg}); } $events->{$file}{$chnl}{$tend}{$class}{$spkr} = "STOP"; } } } } ################################# sub get_uem_data { my ($file, $data) = @_; my ($record, @fields, $seg); return unless defined $file; open DATA, $file or die "\n$usage\nFATAL ERROR: unable to open uem file '$file'\n\n"; print "UEM file = $file\n"; while ($record = ) { chomp $record; next if $record =~ /^\s*([\#;]|$)/; @fields = split /\s+/, $record; shift @fields if $fields[0] eq ""; die "\n\nFATAL ERROR: insufficient number of fields in UEM record '$record'\n\n" unless @fields >= 4; undef $seg; $seg->{FILE} = shift @fields; $seg->{CHNL} = shift @fields; $seg->{TBEG} = shift @fields; $seg->{TEND} = shift @fields; $seg->{FILE} =~ s/.*\///; #strip directory $seg->{FILE} =~ s/\.[^.]*//; #strip file type $seg->{TBEG} =~ s/[^0-9\.]//g; #strip non-numeric (commas) $seg->{TEND} =~ s/[^0-9\.]//g; #strip non-numeric (commas) $seg->{TBEG} += 0; $seg->{TEND} += 0; push @{$data->{$seg->{FILE}}{$seg->{CHNL}}}, $seg; } close DATA; } ################################# sub get_ctm_data { # reference, not sys, ctm data my ($file, $data) = @_; my ($record, @fields, $word, $token_type, %prev_midpts); return unless defined $file; open DATA, $file or die "\n$usage\nFATAL ERROR: unable to open ctm file '$file'\n\n"; print "CTM file = $file\n"; while ($record = ) { chomp $record; next if $record =~ /^\s*([\#;]|$)/; @fields = split /\s+/, $record; shift @fields if $fields[0] eq ""; die "\n\nFATAL ERROR: insufficient number of fields in sys CTM record '$record'\n\n" if @fields < 8; undef $word; $word->{FILE} = shift @fields; $word->{CHNL} = shift @fields; $word->{TBEG} = shift @fields; $word->{TDUR} = shift @fields; $word->{WORD} = shift @fields; shift @fields; $token_type = shift @fields; $word->{SPKR} = shift @fields; if ($opt_n) { next if $token_type eq "noscore"; } $word->{MIDPT} = $word->{TBEG} + $word->{TDUR}/2; while (exists($prev_midpts{$word->{MIDPT}})) { $word->{MIDPT} = $word->{MIDPT} + 0.000001; # 0.000001 is more precision than input data has } $prev_midpts{$word->{MIDPT}} = 1; # now exists push @{$data->{$word->{FILE}}{$word->{CHNL}}}, $word; } close DATA; } ################################# sub add_words { my ($words, $events) = @_; my ($word); foreach $word (@$words) { $events->{$word->{MIDPT}}{WORD} = $word; } } ################################# sub add_collar_no_scoring_zones { my ($eval_segs, $events) = @_; my (@times, $time, $tbeg, $tend, $eval_seg, @eval_segs, %ns_end, $zone, @ns_zones); #set up evaluation segments if (defined $eval_segs) { #eliminate overlapping eval segs undef $tend; foreach $eval_seg (sort {$a->{TBEG}<=>$b->{TBEG}} @$eval_segs) { if (not defined $tend) { $TOTAL_TIME_START = $eval_seg->{TBEG}; } if (not defined $tend or $eval_seg->{TBEG} > $tend) { push @eval_segs, $eval_seg; } else { $eval_segs[@eval_segs-1]->{TEND} = $eval_seg->{TEND}; } $tend = $eval_seg->{TEND}; } } else { #set evaluation segment to first/last times in REF/SYS output @times = sort {$a<=>$b} keys %$events; push @eval_segs, {TBEG => $times[0], TEND => $times[@times-1]}; $TOTAL_TIME_START = $times[0]; } #set up no scoring zones @times = sort {$a<=>$b} keys %$events; foreach $time (@times) { $ns_end{$time-$collar} = $time+$collar if $events->{$time}{REF}; } undef $tend; foreach $tbeg (sort {$a<=>$b} keys %ns_end) { #eliminate overlapping no-score zones if (not defined $tend or $ns_end{$tbeg} > $tend) { push @ns_zones, {TBEG => $tbeg, TEND => $ns_end{$tbeg}}; } else { $ns_zones[@ns_zones-1]->{TEND} = $ns_end{$tbeg}; } $tend = $ns_end{$tbeg}; } #compute evaluation start/stop times foreach $eval_seg (sort {$a->{TBEG}<=>$b->{TBEG}} @eval_segs) { if ($collar == 0) { $events->{$eval_seg->{TBEG}}{SCORE} = "START"; $events->{$eval_seg->{TEND}}{SCORE} = "STOP"; } else { $tend = $eval_seg->{TBEG}; foreach $zone (@ns_zones) { if ($zone->{TBEG} > $tend) { $events->{$tend}{SCORE} = "START"; $events->{$zone->{TBEG}}{SCORE} = "STOP"; } $tend = max($tend, $zone->{TEND}); last if $tend >= $eval_seg->{TEND}; } $events->{$eval_seg->{TEND}}{SCORE} = "STOP"; } } } ################################# sub compute_error_statistics { my ($events, $spkr_info, $seg_stats, $cum_stats) = @_; my ($event, $tbeg, $tend); my ($spkr, $ref_spkr, $sys_spkr, $score, $ref_speakers, $sys_speakers, $seg_dur); my ($nref_mapped, $nref_correct, $nsys_mapped, $nsys_correct, $nmatch); my ($type_stats, $type, $ref_type, %ref_types, $sys_type, %sys_types, %num_types); # exnihilate and initialize seg_stats entries, to avoid undefined keys $seg_stats->{TOTAL_TIME} = 0; $seg_stats->{TOTAL_SPEECH} = 0; $seg_stats->{SCORED_TIME} = 0; $seg_stats->{SCORED_SPEECH} = 0; $seg_stats->{MISSED_SPEECH} = 0; $seg_stats->{FALARM_SPEECH} = 0; $seg_stats->{SCORED_SPEAKER} = 0; $seg_stats->{MISSED_SPEAKER} = 0; $seg_stats->{FALARM_SPEAKER} = 0; $seg_stats->{SPEAKER_ERROR} = 0; # $seg_stats->{SCORED_WORDS} = 0; # leave undefined if were no words $seg_stats->{CORRECT_WORDS} = 0; $seg_stats->{MISSED_WORDS} = 0; $seg_stats->{MISSED_WORDS_IN_OVR} = 0; $seg_stats->{FALARM_WORDS} = 0; $seg_stats->{ERROR_WORDS} = 0; $seg_stats->{ERROR_WORDS_IN_OVR} = 0; $seg_stats->{OVERLAP_WORDS} = 0; # counting just once $seg_stats->{OVERLAP_TIME} = 0; $seg_stats->{MULT_OV_TIME} = 0; #initialize speaker status $seg_stats->{TYPE} = $type_stats = {}; foreach $ref_spkr (keys %{$speaker_data{REF}}) { delete $speaker_data{REF}{$ref_spkr}{SPEAKING}; $sys_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; $speaker_data{SYS}{$sys_spkr}{MAP} = $ref_spkr if defined $sys_spkr; $seg_stats->{MAP}{$ref_spkr} = $sys_spkr; $ref_type = $spkr_info->{REF}{$ref_spkr}{TYPE}; $sys_type = defined $sys_spkr ? $spkr_info->{SYS}{$sys_spkr}{TYPE} : $miss_name; if ($opt_g and not defined $sys_type) { # sys .mdtm said type was "unknown" $sys_type = $miss_name; } $type_stats->{NSPK}{MAP}{REF}{$ref_type}++; $type_stats->{NSPK}{MAP}{JOINT}{$ref_type}{$sys_type}++; } foreach $sys_spkr (keys %{$speaker_data{SYS}}) { $sys_type = $spkr_info->{SYS}{$sys_spkr}{TYPE}; if ($opt_g and not defined $sys_type) { # sys .mdtm said type was "unknown" $sys_type = $miss_name; } $type_stats->{NSPK}{MAP}{SYS}{$sys_type}++; $type_stats->{NSPK}{MAP}{JOINT}{$fa_name}{$sys_type}++ unless defined $speaker_data{SYS}{$sys_spkr}{MAP}; } foreach $sys_spkr (keys %{$speaker_data{SYS}}) { delete $speaker_data{SYS}{$sys_spkr}{SPEAKING}; } # In the following: # A typical event in %$events will be the mid-point of a word, # although there will also be other types of events $tbeg = 0; # Note that the very first event will not be a word midpoint foreach $tend (sort {$a<=>$b} keys %$events) { # typically a word midpt $seg_dur = $tend - $tbeg; # typically word midpt to word midpt $seg_stats->{TOTAL_TIME} = $tend-$TOTAL_TIME_START if defined $events->{$tend}{REF}; $seg_stats->{TOTAL_SPEECH} += $seg_dur if $ref_speakers; #compute diarization scores for this segment {if ($score) { next if (($opt_s or $opt_x) and $ref_speakers >= 2); next if $opt_o and $ref_speakers < 2; $seg_stats->{SCORED_TIME} += $seg_dur; $seg_stats->{SCORED_SPEECH} += $ref_speakers ? $seg_dur : 0; $seg_stats->{MISSED_SPEECH} += ($ref_speakers and not $sys_speakers) ? $seg_dur : 0; $seg_stats->{FALARM_SPEECH} += ($sys_speakers and not $ref_speakers) ? $seg_dur : 0; $seg_stats->{SCORED_SPEAKER} += $ref_speakers*$seg_dur; if ($ref_speakers > 1) { $seg_stats->{OVERLAP_TIME} += $seg_dur; $seg_stats->{MULT_OV_TIME} += $ref_speakers*$seg_dur; if ($events->{$tend}{WORD}) { $seg_stats->{OVERLAP_WORDS} += 1; } } $nref_mapped = $nref_correct = $nsys_mapped = $nsys_correct = 0; foreach $ref_spkr (keys %{$speaker_data{REF}}) { if ($speaker_data{REF}{$ref_spkr}{SPEAKING}) { $sys_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; $nref_correct++ if defined $sys_spkr and $speaker_data{SYS}{$sys_spkr}{SPEAKING}; } } $seg_stats->{MISSED_SPEAKER} += $seg_dur*max($ref_speakers-$sys_speakers, 0); $seg_stats->{FALARM_SPEAKER} += $seg_dur*max($sys_speakers-$ref_speakers, 0); $seg_stats->{SPEAKER_ERROR} += $seg_dur*(min($ref_speakers,$sys_speakers) - $nref_correct); if ($events->{$tend}{WORD}) { # $tend is midpt of a word $seg_stats->{SCORED_WORDS}++ if $ref_speakers > 0; my $foo = ($ref_speakers > 0) ? 1 : 0; if (not $opt_g) { if ( $speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{SPEAKING} and defined $speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{MAP} and $speaker_data{SYS}{$speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{MAP}}{SPEAKING}) { $seg_stats->{CORRECT_WORDS}++; } else { if (min($ref_speakers,$sys_speakers) > $nref_correct) { $seg_stats->{ERROR_WORDS}++; if ($ref_speakers > 1) { $seg_stats->{ERROR_WORDS_IN_OVR}++; } } elsif ($ref_speakers > $sys_speakers) { $seg_stats->{MISSED_WORDS}++; if ($ref_speakers > 1) { $seg_stats->{MISSED_WORDS_IN_OVR}++; } } elsif ($sys_speakers > $ref_speakers) { $seg_stats->{FALARM_WORDS}++; } } } } #compute speaker type statistics undef %num_types; foreach $ref_spkr (keys %{$speaker_data{REF}}) { next unless $speaker_data{REF}{$ref_spkr}{SPEAKING}; $ref_type = $spkr_info->{REF}{$ref_spkr}{TYPE}; $num_types{$ref_type}{REF}++; $sys_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; $sys_type = (defined $sys_spkr and $speaker_data{SYS}{$sys_spkr}{SPEAKING}) ? $spkr_info->{SYS}{$sys_spkr}{TYPE} : $miss_name; if ($opt_g and not defined $sys_type) { # sys .mdtm said type was "unknown" $sys_type = $miss_name; } if (defined $sys_spkr) { $type_stats->{TIME}{MAP}{REF}{$ref_type} += $seg_dur; if ($events->{$tend}{WORD}) { if (defined $opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{REF}{$ref_type}++; } } elsif ($events->{$tend}{WORD}{SPKR} eq $ref_spkr && $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{REF}{$ref_type}++; } } $type_stats->{TIME}{MAP}{JOINT}{$ref_type}{$sys_type} += $seg_dur; if ($events->{$tend}{WORD}) { if ($opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{JOINT}{$ref_type}{$sys_type}++; } if ($sys_type eq $miss_name) { $events->{$tend}{MISS} = $ref_type; } } elsif ($events->{$tend}{WORD}{SPKR} eq $ref_spkr && $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{JOINT}{$ref_type}{$sys_type}++; } } } } foreach $sys_spkr (keys %{$speaker_data{SYS}}) { next unless $speaker_data{SYS}{$sys_spkr}{SPEAKING}; $sys_type = $spkr_info->{SYS}{$sys_spkr}{TYPE}; $num_types{$sys_type}{SYS}++; # num_types is num of spkrs of same type currently spk'g $ref_spkr = $speaker_data{SYS}{$sys_spkr}{MAP}; $ref_type = (defined $ref_spkr and $speaker_data{REF}{$ref_spkr}{SPEAKING}) ? $spkr_info->{REF}{$ref_spkr}{TYPE} : $fa_name; if (defined $ref_spkr) { $type_stats->{TIME}{MAP}{SYS}{$sys_type} += $seg_dur; if ($events->{$tend}{WORD}) { if ($opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{SYS}{$sys_type}++; } } elsif ($events->{$tend}{WORD}{SPKR} eq $ref_spkr && $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{SYS}{$sys_type}++; } } do { $type_stats->{TIME}{MAP}{JOINT}{$ref_type}{$sys_type} += $seg_dur; if ($events->{$tend}{WORD}) { if ($opt_g) { if ($ref_type eq $fa_name) { # shouldn't happen if ($events->{$tend}{MISS}) { $type_stats->{WORDS}{MAP}{JOINT}{$events->{$tend}{MISS}}{$miss_name}--; $type_stats->{TIME}{MAP}{JOINT}{$events->{$tend}{MISS}}{$miss_name} -= $seg_dur; $type_stats->{WORDS}{MAP}{JOINT}{$events->{$tend}{MISS}}{$sys_type}++; $type_stats->{TIME}{MAP}{JOINT}{$events->{$tend}{MISS}}{$sys_type} += $seg_dur; $type_stats->{TIME}{MAP}{JOINT}{$ref_type}{$sys_type} -= $seg_dur; } else { $type_stats->{WORDS}{MAP}{JOINT}{$fa_name}{$sys_type}++; } } } elsif ($events->{$tend}{WORD}{SPKR} eq $ref_spkr) { if ($opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{JOINT}{$ref_type}{$sys_type}++; } } elsif ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $ref_type) { $type_stats->{WORDS}{MAP}{JOINT}{$ref_type}{$sys_type}++ ; } } } } unless $speaker_data{REF}{$ref_spkr}{SPEAKING}; } } foreach $type (keys %num_types) { $num_types{$type}{REF} = 0 unless defined $num_types{$type}{REF}; $num_types{$type}{SYS} = 0 unless defined $num_types{$type}{SYS}; $num_types{$type}{ORIG_SYS} = $num_types{$type}{SYS}; # memory $type_stats->{TIME}{ALL}{REF}{$type} += $seg_dur*$num_types{$type}{REF}; if ($events->{$tend}{WORD}) { if ($opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $type) { $type_stats->{WORDS}{ALL}{REF}{$type}++; } } elsif ( $speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{SPEAKING} and $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $type) { $type_stats->{WORDS}{ALL}{REF}{$type}++; } } $type_stats->{TIME}{ALL}{SYS}{$type} += $seg_dur*$num_types{$type}{SYS}; $nmatch = min($num_types{$type}{REF}, $num_types{$type}{SYS}); next if $nmatch == 0; $type_stats->{TIME}{ALL}{JOINT}{$type}{$type} += $seg_dur*$nmatch; if ($events->{$tend}{WORD}) { if ($opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $type) { $type_stats->{WORDS}{ALL}{JOINT}{$type}{$type}++; } } elsif ( $speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{SPEAKING} and $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $type) { $type_stats->{WORDS}{ALL}{JOINT}{$type}{$type}++; } } $num_types{$type}{REF} -= $nmatch; $num_types{$type}{SYS} -= $nmatch; } foreach $ref_type (sort keys %num_types) { next if $num_types{$ref_type}{REF} == 0; foreach $sys_type (keys %num_types) { next if $num_types{$sys_type}{SYS} == 0; $nmatch = min($num_types{$ref_type}{REF}, $num_types{$sys_type}{SYS}); next if $nmatch == 0; $type_stats->{TIME}{ALL}{JOINT}{$ref_type}{$sys_type} += $seg_dur*$nmatch; if ($events->{$tend}{WORD}) { if ($opt_g) { if ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $ref_type) { $type_stats->{WORDS}{ALL}{JOINT}{$ref_type}{$sys_type}++; } } elsif ( $speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{SPEAKING} and $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $ref_type) { $type_stats->{WORDS}{ALL}{JOINT}{$ref_type}{$sys_type}++; } } $num_types{$ref_type}{REF} -= $nmatch; $num_types{$sys_type}{SYS} -= $nmatch; } } foreach $type (keys %num_types) { do { $type_stats->{TIME}{ALL}{JOINT}{$type}{$miss_name} += $seg_dur*$num_types{$type}{REF}; if ( $events->{$tend}{WORD}) { if ($opt_g) { if ( ($spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{ACTTYPE} eq $type) && ($num_types{$type}{ORIG_SYS} == 0) ) { $type_stats->{WORDS}{ALL}{JOINT}{$type}{$miss_name}++; } } elsif ( $speaker_data{REF}{$events->{$tend}{WORD}{SPKR}}{SPEAKING} and $spkr_info->{REF}{$events->{$tend}{WORD}{SPKR}}{TYPE} eq $type ) { $type_stats->{WORDS}{ALL}{JOINT}{$type}{$miss_name}++ unless $num_types{$type}{ORIG_SYS} > 0; } } } if $num_types{$type}{REF} > 0; do { $type_stats->{TIME}{ALL}{JOINT}{$fa_name}{$type} += $seg_dur*$num_types{$type}{SYS}; if ( $events->{$tend}{WORD} ) { $type_stats->{WORDS}{ALL}{JOINT}{$fa_name}{$type}++; } }if $num_types{$type}{SYS} > 0; } }} #compute the characteristics of the next segment $event = $events->{$tbeg=$tend}; $score = $event->{SCORE} eq "START" if defined $event->{SCORE}; if (defined $event->{REF}) { foreach $spkr (keys %{$event->{REF}}) { $speaker_data{REF}{$spkr}{SPEAKING} = $event->{REF}{$spkr} eq "START"; } } if (defined $event->{SYS}) { foreach $spkr (keys %{$event->{SYS}}) { $speaker_data{SYS}{$spkr}{SPEAKING} = $event->{SYS}{$spkr} eq "START"; } } $ref_speakers = speakers_speaking ($speaker_data{REF}); $sys_speakers = speakers_speaking ($speaker_data{SYS}); } #accumulate statistics foreach $type (keys %$seg_stats) { $cum_stats->{$type} += $seg_stats->{$type} if $type ne "TYPE"; } foreach my $class ("TIME", "NSPK", "WORDS") { foreach my $mode ("ALL", "MAP") { foreach my $kind ("REF", "SYS") { foreach $type (keys %{$type_stats->{$class}{$mode}{$kind}}) { $cum_stats->{TYPE}{$class}{$mode}{$kind}{$type} += $type_stats->{$class}{$mode}{$kind}{$type}; } } foreach $type (keys %{$type_stats->{$class}{$mode}{JOINT}}) { foreach $sys_type (keys %{$type_stats->{$class}{$mode}{JOINT}{$type}}) { $cum_stats->{TYPE}{$class}{$mode}{JOINT}{$type}{$sys_type} += $type_stats->{$class}{$mode}{JOINT}{$type}{$sys_type}; } } } } } ################################# sub speakers_speaking { my ($spkr_data) = @_; my $speakers = 0; foreach my $spkr (keys %{$spkr_data}) { $speakers++ if $spkr_data->{$spkr}{SPEAKING}; } return $speakers; } ################################# sub summarize_performance { my ($stats) = @_; my ($task_name); print "\n-------------------------------------------------------------------------\n\n"; printf "Summary by time:\n"; print "------------------\n\n"; printf " TOTAL TIME =%8.2f secs\n", $stats->{TOTAL_TIME}; printf " TOTAL SPEECH =%8.2f secs (%5.1f percent of total time)\n", $stats->{TOTAL_SPEECH}, 100*$stats->{TOTAL_SPEECH}/$stats->{TOTAL_TIME}; printf " SCORED TIME =%8.2f secs (%5.1f percent of total time)\n", $stats->{SCORED_TIME}, 100*$stats->{SCORED_TIME}/$stats->{TOTAL_TIME}; printf "SCORED SPEECH =%8.2f secs ", $stats->{SCORED_SPEECH}; if ($stats->{SCORED_TIME} > 0) { printf "(%5.1f percent of scored time)\n", 100*$stats->{SCORED_SPEECH}/$stats->{SCORED_TIME}; } else { print "\n";} if (not ($opt_x or $opt_s) and not $opt_o) { print "---------------------------------------------\n"; printf " TIME IN OVERLAPS =%8.2f secs (%5.1f percent of scored time)\n", $stats->{OVERLAP_TIME}, 100*$stats->{OVERLAP_TIME}/$stats->{SCORED_TIME}; printf "TIME*SPEAKERS IN OVERLAPS =%8.2f secs (%5.1f percent of scored time)\n", $stats->{MULT_OV_TIME}, 100*$stats->{MULT_OV_TIME}/$stats->{SCORED_TIME}; } print "---------------------------------------------\n"; printf "MISSED SPEECH =%8.2f secs ", $stats->{MISSED_SPEECH}; if ($stats->{SCORED_TIME} > 0) { printf "(%5.1f percent of scored time)\n", 100*$stats->{MISSED_SPEECH}/$stats->{SCORED_TIME}; } else { print "\n";} printf "FALARM SPEECH =%8.2f secs", $stats->{FALARM_SPEECH}; if ($stats->{SCORED_TIME} > 0) { printf "(%5.1f percent of scored time)\n", 100*$stats->{FALARM_SPEECH}/$stats->{SCORED_TIME}; } else { print "\n";} print "-------------------------------------------------------------------------\n"; printf "SCORED SPEAKER TIME =%8.2f secs ", $stats->{SCORED_SPEAKER}; if ($stats->{SCORED_SPEECH} > 0) { printf "(%5.1f percent of scored speech)\n", 100*$stats->{SCORED_SPEAKER}/$stats->{SCORED_SPEECH}; } else { print "\n";} printf "MISSED SPEAKER TIME =%8.2f secs ", $stats->{MISSED_SPEAKER}; if ($stats->{SCORED_SPEAKER} > 0) { printf "(%5.1f percent of scored speaker time)\n", 100*$stats->{MISSED_SPEAKER}/$stats->{SCORED_SPEAKER}; } else { print "\n";} printf "FALARM SPEAKER TIME =%8.2f secs ", $stats->{FALARM_SPEAKER}; if ($stats->{SCORED_SPEAKER} > 0) { printf "(%5.1f percent of scored speaker time)\n", 100*$stats->{FALARM_SPEAKER}/$stats->{SCORED_SPEAKER}; } else { print "\n";} printf " SPEAKER ERROR TIME =%8.2f secs ", $stats->{SPEAKER_ERROR}; if ($stats->{SCORED_SPEAKER} > 0) { printf "(%5.1f percent of scored speaker time)\n", 100*$stats->{SPEAKER_ERROR}/$stats->{SCORED_SPEAKER}; } else { print "\n";} print "---------------------------------------------\n"; if ($opt_g) { $task_name = "SPEAKER TYPE DIARIZATION"; } else { $task_name = "SPEAKER DIARIZATION"; } if ($stats->{SCORED_SPEAKER} > 0) { printf " OVERALL $task_name ERROR = %.2f percent of scored speaker time\n", 100*($stats->{MISSED_SPEAKER} + $stats->{FALARM_SPEAKER} + $stats->{SPEAKER_ERROR})/ $stats->{SCORED_SPEAKER}; } else { printf " OVERALL $task_name ERROR TIME = %8.2f secs\n", 100 * ($stats->{MISSED_SPEAKER} + $stats->{FALARM_SPEAKER} + $stats->{SPEAKER_ERROR} ); } print "=========================================================================\n\n"; if (defined $opt_w) { printf "Summary by words:\n"; print "-------------------\n\n"; printf " WORDS IN OVERLAPS =%6d ", $stats->{OVERLAP_WORDS}; if ($stats->{SCORED_WORDS}) { printf "(%5.1f percent of scored speaker words)", 100*$stats->{OVERLAP_WORDS}/$stats->{SCORED_WORDS}; } print "\n"; if (not($opt_s or $opt_x) and not $opt_o) { printf " MISSED WORDS IN OVERLAPS =%6d\n", $stats->{MISSED_WORDS_IN_OVR}; printf " ERROR WORDS IN OVERLAPS =%6d\n", $stats->{ERROR_WORDS_IN_OVR}; } print "-------------------------------------------------------------------------\n"; if (defined $stats->{SCORED_WORDS}) { if ($opt_g) { my ($correct_total, $error_total, $miss_total, $fa_total); ($correct_total, $miss_total, $fa_total, $error_total) = calculate_word_based_performance($stats->{TYPE}, "WORDS", "ALL"); printf "SCORED SPEAKER WORDS =%6d\n", $stats->{SCORED_WORDS}; printf " CORRECT WORDS =%6d\n", $correct_total; printf "MISSED SPEAKER WORDS =%6d (%5.1f percent of scored speaker words)\n", $miss_total, 100*$miss_total/$stats->{SCORED_WORDS}; printf "FALARM SPEAKER WORDS =%6d (%5.1f percent of scored speaker words)\n", $fa_total, 100*$fa_total/$stats->{SCORED_WORDS}; printf " SPEAKER ERROR WORDS =%6d (%5.1f percent of scored speaker words)\n", $error_total, 100*$error_total/$stats->{SCORED_WORDS}; print "---------------------------------------------\n"; printf " OVERALL SPEAKER TYPE DIARIZATION ERROR = %.2f percent of scored speaker words\n", 100*($miss_total + $fa_total + $error_total) / $stats->{SCORED_WORDS}; } else { # not $opt_g printf " SCORED SPEAKER WORDS =%6d\n", $stats->{SCORED_WORDS}; printf "CORRECT SPEAKER WORDS =%6d (%5.1f percent of scored speaker words)\n", $stats->{CORRECT_WORDS}, 100*$stats->{CORRECT_WORDS}/$stats->{SCORED_WORDS}; printf " MISSED SPEAKER WORDS =%6d (%5.1f percent of scored speaker words)\n", $stats->{MISSED_WORDS}, 100*$stats->{MISSED_WORDS}/$stats->{SCORED_WORDS}; printf " FALARM SPEAKER WORDS =%6d (%5.1f percent of scored speaker words)\n", $stats->{FALARM_WORDS}, 100*$stats->{FALARM_WORDS}/$stats->{SCORED_WORDS}; printf " SPEAKER ERROR WORDS =%6d (%5.1f percent of scored speaker words)\n", $stats->{ERROR_WORDS}, 100*$stats->{ERROR_WORDS}/$stats->{SCORED_WORDS}; print "---------------------------------------------\n"; printf " OVERALL SPEAKER DIARIZATION ERROR = %.2f percent of scored speaker words\n", 100*($stats->{MISSED_WORDS} + $stats->{FALARM_WORDS} + $stats->{ERROR_WORDS}) / $stats->{SCORED_WORDS}; } } else { printf "SCORED SPEAKER WORDS = 0\n"; printf "MISSED SPEAKER WORDS = 0 (*undefined* percent of scored speaker words\n"; printf "FALARM SPEAKER WORDS = 0 (*undefined* percent of scored speaker words)\n"; printf " SPEAKER ERROR WORDS = 0 (*undefined* percent of scored speaker words)\n"; print "---------------------------------------------\n"; printf " OVERALL SPEAKER DIARIZATION ERROR = *undefined*\n"; print "\t(no scored speaker words, so none were diarized incorrectly)\n"; } print "=========================================================================\n\n"; } print "Summary by speaker type:\n"; print "--------------------------\n\n"; if (not $opt_g) { printf " Speaker type confusion matrix -- speaker counts\n"; summarize_speaker_type_performance ($stats->{TYPE}, "NSPK", "MAP"); print "---------------------------------------------\n"; } if (not $opt_g) { print " Speaker type confusion matrix, by time -- for mapped speakers only\n"; summarize_speaker_type_performance ($stats->{TYPE}, "TIME", "MAP"); print "---------------------------------------------\n"; } if (defined $opt_w and not $opt_g) { print " Speaker type confusion matrix, by words -- for mapped speakers only\n"; summarize_speaker_type_performance ($stats->{TYPE}, "WORDS", "MAP"); } print "---------------------------------------------\n"; print " Speaker type confusion matrix, by time"; if (not $opt_g) { print " -- for all speech"; } print "\n"; summarize_speaker_type_performance ($stats->{TYPE}, "TIME", "ALL"); if (defined $opt_w) { print "---------------------------------------------\n"; print " Speaker type confusion matrix, by words"; if (not $opt_g) { print " -- for all speech"; } print "\n"; summarize_speaker_type_performance ($stats->{TYPE}, "WORDS", "ALL"); } print "\n-------------------------------------------------------------------------\n\n"; } ################################# sub summarize_speaker_type_performance { my ($stats, $class, $mode) = @_; my ($ref_type, $sys_type, $sys_stat, %sys_types); foreach $ref_type (keys %{$stats->{$class}{$mode}{REF}}, $fa_name) { foreach $sys_type (keys %{$stats->{$class}{$mode}{JOINT}{$ref_type}}) { $sys_types{$sys_type} = 1 unless $sys_type eq $miss_name; } } print " REF\\SYS (count) " if $class eq "NSPK"; print " REF\\SYS (seconds) " if $class eq "TIME"; print " REF\\SYS (count) " if $class eq "WORDS"; foreach $sys_type ((sort keys %sys_types), $miss_name) { printf "%-20s", $sys_type; } print "\n"; my $ref_tot = 0; foreach $ref_type (keys %{$stats->{$class}{$mode}{REF}}) { $ref_tot += $stats->{$class}{$mode}{REF}{$ref_type}; } foreach $ref_type ((sort keys %{$stats->{$class}{$mode}{REF}}), $fa_name) { printf "%-16s", $ref_type; foreach $sys_type ((sort keys %sys_types), $miss_name) { next if $ref_type eq $fa_name and $sys_type eq $miss_name; $sys_stat = $stats->{$class}{$mode}{JOINT}{$ref_type}{$sys_type}; $sys_stat = 0 unless defined $sys_stat; if ($ref_tot == 0) { $ref_tot = 1; } printf "%11d /%6.1f", $sys_stat, 100*$sys_stat/$ref_tot, if $class eq "NSPK"; printf "%11d /%6.1f", $sys_stat, 100*$sys_stat/$ref_tot, if $class eq "WORDS"; printf "%11.2f /%6.1f", $sys_stat, 100*$sys_stat/$ref_tot, if $class eq "TIME"; print "%"; } print "\n"; } } ################################# sub calculate_word_based_performance { my ($stats, $class, $mode) = @_; my ($ref_type, $sys_type, $sys_stat, %sys_types); my ($correct_total, $error_total, $miss_total, $fa_total); $correct_total = $error_total = $miss_total = $fa_total = 0; foreach $ref_type (keys %{$stats->{$class}{$mode}{REF}}, $fa_name) { foreach $sys_type (keys %{$stats->{$class}{$mode}{JOINT}{$ref_type}}) { $sys_types{$sys_type} = 1 unless $sys_type eq $miss_name; } } my $ref_tot = 0; foreach $ref_type (keys %{$stats->{$class}{$mode}{REF}}) { $ref_tot += $stats->{$class}{$mode}{REF}{$ref_type}; } foreach $ref_type ((sort keys %{$stats->{$class}{$mode}{REF}}), $fa_name) { foreach $sys_type ((sort keys %sys_types), $miss_name) { next if $ref_type eq $fa_name and $sys_type eq $miss_name; $sys_stat = $stats->{$class}{$mode}{JOINT}{$ref_type}{$sys_type}; $sys_stat = 0 unless defined $sys_stat; if ($ref_type eq $sys_type) { $correct_total += $sys_stat; } elsif ($sys_type eq $miss_name) { $miss_total += $sys_stat; } elsif ($ref_type eq $fa_name) { $fa_total += $sys_stat; } else { $error_total += $sys_stat; } } } return ($correct_total, $miss_total, $fa_total, $error_total); } ################################# sub map_speakers { my ($events, $time_info) = @_; my ($event, $tbeg, $tend); my ($spkr, $ref_spkr, $sys_spkr, $score); #compute the cumulative match time between ref and sys speakers $tbeg = 0; foreach $tend (sort {$a<=>$b} keys %$events) { if ($score) { foreach $ref_spkr (keys %{$speaker_data{REF}}) { next unless $speaker_data{REF}{$ref_spkr}{SPEAKING}; foreach $sys_spkr (keys %{$speaker_data{SYS}}) { next unless $speaker_data{SYS}{$sys_spkr}{SPEAKING}; $time_info->{$ref_spkr}{$sys_spkr} += $tend-$tbeg; } } } $event = $events->{$tend}; $score = $event->{SCORE} eq "START" if defined $event->{SCORE}; if (defined $event->{REF}) { foreach $spkr (keys %{$event->{REF}}) { $speaker_data{REF}{$spkr}{SPEAKING} = $event->{REF}{$spkr} eq "START"; } } if (defined $event->{SYS}) { foreach $spkr (keys %{$event->{SYS}}) { $speaker_data{SYS}{$spkr}{SPEAKING} = $event->{SYS}{$spkr} eq "START"; } } $tbeg = $tend; } if ($opt_g) { # Map each speaker type (sex) to itself foreach $spkr (keys %{$speaker_data{REF}}) { $speaker_data{REF}{$spkr}{MAP} = $spkr; $speaker_data{SYS}{$spkr}{MAP} = $spkr; } } else { # Find the mapping that maximizes the cumulative match time # between ref and sys spkrs best_match ($time_info); } } ################################# sub best_match { my ($time_info) = @_; my ($ref_spkr, $sys_spkr, @ref_spkrs, @sys_spkrs); my (@map_candidates, $score, $score_bound); #create sys speaker pointer array foreach $sys_spkr (keys %{$speaker_data{SYS}}) { $speaker_data{SYS}{$sys_spkr}{ID} = $sys_spkr; push @sys_spkrs, $speaker_data{SYS}{$sys_spkr}; } #create ref speaker pointer array foreach $ref_spkr (keys %{$speaker_data{REF}}) { $speaker_data{REF}{$ref_spkr}{ID} = $ref_spkr; push @ref_spkrs, $speaker_data{REF}{$ref_spkr}; } #set up map candidates for all ref speakers foreach $ref_spkr (@ref_spkrs) { #order sys speakers so that those with the best potential match come first undef @map_candidates; push @map_candidates, {speaker => undef, score => 0}; foreach $sys_spkr (@sys_spkrs) { $score = $time_info->{$ref_spkr->{ID}}{$sys_spkr->{ID}}; push @map_candidates, {speaker => $sys_spkr, score => $score} if defined $score and $score > 0; } @map_candidates = sort {$b->{score} <=> $a->{score}} @map_candidates; $ref_spkr->{candidates} = [@map_candidates]; $ref_spkr->{max_score} = $ref_spkr->{candidates}[0]->{score}; } #order ref speakers so that those with the largest potential score come first $score_bound = 0; @ref_spkrs = sort {$b->{max_score} <=> $a->{max_score}} @ref_spkrs; foreach $ref_spkr (reverse @ref_spkrs) { $score_bound += $ref_spkr->{max_score}; $ref_spkr->{score_bound} = $score_bound; } #perform exhaustive search for optimum mapping find_optimum_mapping ($best_score=0, \@ref_spkrs, \@sys_spkrs, \@ref_spkrs); #get mapped sys speaker ID's foreach $ref_spkr (@ref_spkrs) { $ref_spkr->{MAP} = $ref_spkr->{MAP}{ID} if defined $ref_spkr->{MAP}; } } ################################# sub find_optimum_mapping { my ($prior_score, $ref_remaining, $sys_spkrs, $ref_spkrs) = @_; my ($ref_spkr, $sys_spkr, $candidate, @candidates); my (@ref_remaining) = @$ref_remaining; #map the next ref speaker if ($ref_spkr = shift (@ref_remaining)) { return if ($prior_score+$ref_spkr->{score_bound} <= $best_score); #now try all mappings @candidates = @{$ref_spkr->{candidates}}; while (my $candidate = shift @candidates) { #each sys speaker may map to only one ref speaker $sys_spkr = $candidate->{speaker}; unless ($sys_spkr->{MAP}) { #try this mapping $sys_spkr->{MAP} = $ref_spkr if defined $sys_spkr->{ID}; find_optimum_mapping ($candidate->{score}+$prior_score, \@ref_remaining, $sys_spkrs, $ref_spkrs); delete $sys_spkr->{MAP}; } } } #all reference speakers have been mapped else { if ($prior_score > $best_score) { #keep better mapping $best_score = $prior_score; foreach my $spkr (@$ref_spkrs) { #delete old mapping delete $spkr->{MAP}; } foreach my $spkr (@$sys_spkrs) { #create new mapping $spkr->{MAP}->{MAP} = $spkr if ($spkr->{MAP}); } } } } ################################# sub print_speaker_map { my ($time_info) = @_; my ($ref_spkr, $sys_spkr); foreach $ref_spkr (sort keys %{$speaker_data{REF}}) { $sys_spkr = $speaker_data{REF}{$ref_spkr}{MAP}; print "'$ref_spkr' => ", defined $sys_spkr ? "'$sys_spkr'\n" : "\n"; foreach $sys_spkr (sort keys %{$speaker_data{SYS}}) { my $time = $time_info->{$ref_spkr}{$sys_spkr}; printf "%9.2f secs matched to '$sys_spkr'\n", defined $time ? $time : 0; } } } ################################# sub print_events { my ($events) = @_; foreach my $time (sort {$a<=>$b} keys %$events) { print "$time, SCORE, $events->{$time}{SCORE}\n" if defined $events->{$time}{SCORE}; if (defined $events->{$time}{REF}) { print "$time, REF"; foreach my $spkr (sort keys %{$events->{$time}{REF}}) { print ", ($spkr $events->{$time}{REF}{$spkr})"; } print "\n"; } if (defined $events->{$time}{SYS}) { print "$time, SYS"; foreach my $spkr (sort keys %{$events->{$time}{SYS}}) { print ", ($spkr $events->{$time}{SYS}{$spkr})"; } print "\n"; } } } ################################# sub date_time_stamp { my ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); my @months = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my ($date, $time); $time = sprintf "%2.2d:%2.2d:%2.2d", $hour, $min, $sec; $date = sprintf "%4.4s %3.3s %s", 1900+$year, $months[$mon], $mday; return ($date, $time); } ################################# sub max { my ($max, $x); $max = shift @_; foreach $x (@_) { if ($x > $max) {$max = $x} } return $max; } ################################# sub min { my ($min, $x); $min = shift @_; foreach $x (@_) { if ($x < $min) {$min = $x} } return $min; } #################################