#!/usr/bin/perl -w use strict; ################################# # History: # # version 13: * word alignment tuned to include consideration of metadata # attributes in addition to word spelling and type/subtype. # * use of UEM data changed - rather than discard rttm data # in the UEM exclusion zones, the data are processed normally # and words in the exclusion zones are not counted during # DEPOD scoring. # * accommodate multiple hyphens when splitting hyphenated words # # version 12: * score frags, but noscore NON-LEX and NON-SPEECH # * modify word alignment to: # - make fp match fp, ignoring orthography # - make frag match frag, ignoring orthography # # version 11: * coded around an error in the Unix implementation of Perl # # version 10: * count all LEXEMEs in denominator of overall disfluency error # score (to conform with version 6 of the eval plan) # * fix bug in counting metadata error words # * add IP (exact) boundary detection statistics # # version 9: * converted to run from RTTM input files # # version 8: * cosmetic enhancement to print confusion statistics for all # used subtypes (rather than all subtypes with at least 1 error) # # version 7: * disfluency subtypes updated by adding "simple" as an edit subtype # and "edit&filler" as an IP subtype # * conditional evaluation of complex edits added # * subtype confusion statistics added for IP's # # version 6: * pads redefined as gaps # * a few bug fixes # * cosmetic improvements # # version 5: * bug fix (in *_word_indices calculation) # # version 4: * increase W_pad to allow non-adjacent sys and ref dfs to map when # using word-based overlap computations. # * modify word_score to induce sys and ref non-lex to map to each # other. # * add un-lex to word types that count in disfluency scoring. # # version 3: * word_score modified to give yet better alignment of sys and ref # (by half-penalizing fps and frags that have spelling mismatches). # * limit word counts in disfluency scoring to lex, fp, and for-lex. # # version 2: * word_score modified to give better alignment of sys and ref # (by reassessing fault for spurious system output of fps and frags). # * Miss/FA added to confusion matrices. # * better labelling of evaluation output. # # version 1: adapted from stt-eval # ################################# ###### # Intro my ($date, $time) = date_time_stamp(); print "df-eval run on $date at $time\n"; print "command line: ", $0, " ", join(" ", @ARGV), "\n"; my $usage = "\n\nUsage: $0 [-h] -r -s \n\n". "Description: df-eval computes EARS disfluency statistics\n". " by comparing system output data with reference data\n". "INPUT:\n". " -r A file containing reference data, in RTTM format.\n". " This data must include word level transcripts in addition to\n". " the disfluency information being evaluated.\n". " -s A file containing system output data, in RTTM format.\n". " If the word-mediated alignment option is used then this data\n". " must include word level transcripts in addition to the disfluency\n". " output data being evaluated.\n". " input options:\n". " -c to include complex edits in the analysis and scoring.\n". " -w for word-mediated alignment.\n". " * The default (time-mediated) alignment aligns ref and sys metadata\n". " according to the time overlap of the original ref and sys metadata\n". " time intervals.\n". " * Word-mediated alignment aligns ref and sys metadata according to\n". " the alignment of the words that are subsumed within the metadata\n". " time intervals.\n". " -W for word-optimized mapping.\n". " * The default (time-optimized) mapping maps ref and sys metadata\n". " so as to maximize the time overlap of mapped metadata events.\n". " * Word-optimized mapping maps ref and sys metadata so as to\n". " maximize the overlap in terms of the number of reference words\n". " that are subsumed within the overlapping time interval.\n". " -t