#!/usr/bin/perl -w use strict; ################################# # History: # # version 15: * word alignment tuned to include consideration of metadata # attributes in addition to word spelling and type/subtype. # # version 14: * use of UEM data changed - rather than discard rttm data # in the UEM exclusion zones, the data are processed normally # and SU end boundaries in the exclusion zones are not counted # during scoring. # * count SU boundary errors explicitly rather than inferring # them from SU statistics. # # version 13: * accommodate multiple hyphens when splitting hyphenated words # * penalize SU mapping score for differences in SU endpoints # # version 12: * debug discard_unannotated_SUs # # version 11: * score frags and noscore NON-LEX and NON-SPEECH # * modify word alignment to # - allow frag to match frag ignoring orthography # - allow fp to match fp ignoring orthography # # version 10: * fix bug in md_score # * fix bug in discard_unannotated_SUs # # version 9: * fix bug in translating sys time to ref time # * upgrade word representation from CTM to RTTM # # version 8: * bug fix, plus rule change for discarding unannotated SU's: # - discard sys output SU's that map to unannotated ref SU's # or that are are unmapped but overlap an unannotated ref SU. # - discard unannotated ref SU's. # # version 7: * code around an error in the Unix implementation of Perl # # version 6: * fix bug in handling of unannotated SU's # * reinstitute scoring of SU type (since the basic # eval plan which ignores SU type is computed separately) # # version 5: added Error(SU) as defined be eval plan v6. # # version 4: modified to discard all ref SU's of type 'unannotated' # and all sys SU's that are mapped to them and # to discard all unmapped sys SU's that overlap # the discarded ref SU's of type 'unannotated' # fixed to score only SU boundary and not on SU boundary and SU type # fixed 'uninitialized variable' error in reading in rttm file # # version 3: converted to run from RTTM input files # # version 2: minor bug fixed in interpretation of SU extent limit # # version 1: adapted from md-eval-v05 # ################################# ###### # Intro my ($date, $time) = date_time_stamp(); print "su-eval run on $date at $time\n"; print "command line: ", $0, " ", join(" ", @ARGV), "\n"; my $usage = "\n\nUsage: $0 [-h] -r -s \n\n". "Description: su-eval computes EARS SU performance statistics\n". " by comparing system output data with reference data\n". "INPUT:\n". " -r A file containing reference data, in RTTM format.\n". " This data must include word level transcripts in addition to\n". " the SU information being evaluated.\n". " -s A file containing system output data, in RTTM format.\n". " If the word-mediated alignment option is used then this data\n". " must include word level transcripts in addition to the SU output\n". " data being evaluated.\n". " input options:\n". " -w for word-mediated alignment.\n". " * The default (time-mediated) alignment aligns ref and sys metadata\n". " according to the time overlap of the original ref and sys metadata\n". " time intervals.\n". " * Word-mediated alignment aligns ref and sys metadata according to\n". " the alignment of the words that are subsumed within the metadata\n". " time intervals.\n". " -W for word-optimized mapping.\n". " * The default (time-optimized) mapping maps ref and sys metadata\n". " so as to maximize the time overlap of mapped metadata events.\n". " * Word-optimized mapping maps ref and sys metadata so as to\n". " maximize the overlap in terms of the number of reference words\n". " that are subsumed within the overlapping time interval.\n". " -t