Single Reference Track
| 1 | SEPIA2 | 0.8048 | (0.7895, 0.8191) | 0.6024 | (0.5749, 0.6285) | 0.7782 | (0.7610, 0.7942) | graph_category graph_category_2 graph_scatterplot |
| 2 | CDer | -0.8353 | (-0.8475, -0.8221) | -0.6385 | (-0.6628, -0.6130) | -0.8330 | (-0.8455, -0.8197) | graph_category graph_category_2 graph_scatterplot |
| 3 | ULCh | 0.4571 | (0.4233, 0.4897) | 0.3098 | (0.2714, 0.3473) | 0.4612 | (0.4275, 0.4936) | graph_category graph_category_2 graph_scatterplot |
| 4 | TER-v0.7.25 | -0.7936 | (-0.8087, -0.7776) | -0.5966 | (-0.6230, -0.5688) | -0.7810 | (-0.7969, -0.7641) | graph_category graph_category_2 graph_scatterplot |
| 5 | DP-Orp | 0.3344 | (0.2965, 0.3712) | 0.2247 | (0.1844, 0.2642) | 0.3416 | (0.3039, 0.3781) | graph_category graph_category_2 graph_scatterplot |
| 6 | NIST-v11b | 0.8143 | (0.7997, 0.8280) | 0.6137 | (0.5868, 0.6392) | 0.8096 | (0.7946, 0.8236) | graph_category graph_category_2 graph_scatterplot |
| 7 | ATEC4 | 0.6638 | (0.6396, 0.6866) | 0.4811 | (0.4481, 0.5127) | 0.6453 | (0.6201, 0.6692) | graph_category graph_category_2 graph_scatterplot |
| 8 | ATEC1 | 0.6672 | (0.6432, 0.6899) | 0.4847 | (0.4519, 0.5161) | 0.6445 | (0.6193, 0.6684) | graph_category graph_category_2 graph_scatterplot |
| 9 | mBLEU | 0.5225 | (0.4913, 0.5524) | 0.3951 | (0.3591, 0.4300) | 0.5074 | (0.4756, 0.5380) | graph_category graph_category_2 graph_scatterplot |
| 10 | SNR | 0.4979 | (0.4657, 0.5289) | 0.3445 | (0.3070, 0.3810) | 0.4317 | (0.3969, 0.4652) | graph_category graph_category_2 graph_scatterplot |
| 11 | 4-GRR | 0.7780 | (0.7608, 0.7940) | 0.5782 | (0.5496, 0.6055) | 0.7464 | (0.7272, 0.7644) | graph_category graph_category_2 graph_scatterplot |
| 12 | ATEC2 | 0.6663 | (0.6422, 0.6890) | 0.4837 | (0.4508, 0.5152) | 0.6436 | (0.6183, 0.6675) | graph_category graph_category_2 graph_scatterplot |
| 13 | SEPIA1 | 0.8108 | (0.7959, 0.8247) | 0.6091 | (0.5820, 0.6349) | 0.7839 | (0.7671, 0.7996) | graph_category graph_category_2 graph_scatterplot |
| 14 | ULCopt | 0.4818 | (0.4489, 0.5134) | 0.3309 | (0.2930, 0.3678) | 0.4340 | (0.3993, 0.4675) | graph_category graph_category_2 graph_scatterplot |
| 15 | mTER | -0.4983 | (-0.5292, -0.4661) | -0.3759 | (-0.4114, -0.3393) | -0.3729 | (-0.4085, -0.3362) | graph_category graph_category_2 graph_scatterplot |
| 16 | EDPM | 0.8134 | (0.7986, 0.8271) | 0.6134 | (0.5866, 0.6390) | 0.8009 | (0.7853, 0.8155) | graph_category graph_category_2 graph_scatterplot |
| 17 | BLEU-4 | 0.7707 | (0.7531, 0.7872) | 0.5691 | (0.5400, 0.5968) | 0.7449 | (0.7256, 0.7630) | graph_category graph_category_2 graph_scatterplot |
| 18 | METEOR-v0.6 | 0.8108 | (0.7959, 0.8247) | 0.6106 | (0.5836, 0.6363) | 0.8015 | (0.7859, 0.8160) | graph_category graph_category_2 graph_scatterplot |
| 19 | RTE-MT | 0.6914 | (0.6689, 0.7127) | 0.4991 | (0.4669, 0.5300) | 0.6628 | (0.6386, 0.6857) | graph_category graph_category_2 graph_scatterplot |
| 20 | BadgerLite | 0.5485 | (0.5184, 0.5772) | 0.3840 | (0.3476, 0.4192) | 0.5247 | (0.4936, 0.5545) | graph_category graph_category_2 graph_scatterplot |
| 21 | METEOR-ranking | 0.8395 | (0.8267, 0.8515) | 0.6403 | (0.6148, 0.6644) | 0.8297 | (0.8162, 0.8424) | graph_category graph_category_2 graph_scatterplot |
| 22 | LET | 0.7989 | (0.7832, 0.8136) | 0.5960 | (0.5683, 0.6224) | 0.7945 | (0.7785, 0.8095) | graph_category graph_category_2 graph_scatterplot |
| 23 | DP-Or | 0.4909 | (0.4583, 0.5221) | 0.3392 | (0.3015, 0.3758) | 0.5170 | (0.4855, 0.5471) | graph_category graph_category_2 graph_scatterplot |
| 24 | ATEC3 | 0.6671 | (0.6432, 0.6898) | 0.4881 | (0.4554, 0.5194) | 0.6345 | (0.6087, 0.6589) | graph_category graph_category_2 graph_scatterplot |
| 25 | BLEU-v12 | 0.7772 | (0.7600, 0.7933) | 0.5754 | (0.5466, 0.6029) | 0.7513 | (0.7325, 0.7691) | graph_category graph_category_2 graph_scatterplot |
| 26 | BEwT-E | 0.6528 | (0.6280, 0.6763) | 0.4839 | (0.4511, 0.5154) | 0.6482 | (0.6231, 0.6719) | graph_category graph_category_2 graph_scatterplot |
| 27 | RTE | 0.6553 | (0.6306, 0.6786) | 0.4698 | (0.4364, 0.5019) | 0.6102 | (0.5831, 0.6359) | graph_category graph_category_2 graph_scatterplot |
| 28 | DR-Or | 0.4088 | (0.3732, 0.4432) | 0.2780 | (0.2388, 0.3163) | 0.4211 | (0.3860, 0.4551) | graph_category graph_category_2 graph_scatterplot |
| 29 | BleuSP | 0.7892 | (0.7729, 0.8046) | 0.5876 | (0.5594, 0.6145) | 0.7637 | (0.7456, 0.7807) | graph_category graph_category_2 graph_scatterplot |
| 30 | SVM-Rank | 0.7935 | (0.7775, 0.8086) | 0.5940 | (0.5661, 0.6205) | 0.7788 | (0.7618, 0.7948) | graph_category graph_category_2 graph_scatterplot |
| 31 | BLEU-1 | 0.8034 | (0.7880, 0.8178) | 0.6024 | (0.5750, 0.6285) | 0.8054 | (0.7901, 0.8197) | graph_category graph_category_2 graph_scatterplot |
| 32 | Bleu-sbp | 0.7782 | (0.7611, 0.7942) | 0.5769 | (0.5482, 0.6042) | 0.7522 | (0.7334, 0.7699) | graph_category graph_category_2 graph_scatterplot |
| 33 | invWer | -0.8048 | (-0.8191, -0.7895) | -0.6087 | (-0.6345, -0.5816) | -0.7907 | (-0.8059, -0.7744) | graph_category graph_category_2 graph_scatterplot |
| 34 | BLEU-v11b | 0.7721 | (0.7546, 0.7885) | 0.5711 | (0.5421, 0.5987) | 0.7470 | (0.7279, 0.7650) | graph_category graph_category_2 graph_scatterplot |
| 35 | SR-Or | 0.4532 | (0.4191, 0.4859) | 0.3049 | (0.2664, 0.3425) | 0.4856 | (0.4529, 0.5171) | graph_category graph_category_2 graph_scatterplot |
| 36 | Badger | 0.5339 | (0.5032, 0.5633) | 0.3745 | (0.3378, 0.4100) | 0.5176 | (0.4861, 0.5477) | graph_category graph_category_2 graph_scatterplot |
| 37 | Meteor-v0.7 | 0.8415 | (0.8288, 0.8533) | 0.6425 | (0.6171, 0.6665) | 0.8391 | (0.8262, 0.8511) | graph_category graph_category_2 graph_scatterplot |
| 38 | MaxSim | 0.5252 | (0.4941, 0.5549) | 0.3635 | (0.3265, 0.3994) | 0.5132 | (0.4815, 0.5434) | graph_category graph_category_2 graph_scatterplot |
| 39 | TERp | -0.8136 | (-0.8273, -0.7989) | -0.6178 | (-0.6432, -0.5912) | -0.8061 | (-0.8203, -0.7909) | graph_category graph_category_2 graph_scatterplot |
39 metrics (including 7 baseline metrics)
2179 data points (total number of documents used)
|
Multiple References Track
| 1 | SEPIA2 | 0.8319 | (0.8149, 0.8475) | 0.6423 | (0.6102, 0.6723) | 0.8089 | (0.7898, 0.8264) | graph_category graph_category_2 graph_scatterplot |
| 2 | CDer | -0.8510 | (-0.8650, -0.8358) | -0.6603 | (-0.6891, -0.6294) | -0.8585 | (-0.8718, -0.8439) | graph_category graph_category_2 graph_scatterplot |
| 3 | ULCh | -0.0048 | (-0.0577, 0.0481) | 0.0037 | (-0.0492, 0.0565) | 0.0158 | (-0.0371, 0.0686) | graph_category graph_category_2 graph_scatterplot |
| 4 | TER-v0.7.25 | -0.7623 | (-0.7836, -0.7392) | -0.5680 | (-0.6028, -0.5311) | -0.7627 | (-0.7839, -0.7396) | graph_category graph_category_2 graph_scatterplot |
| 5 | DP-Orp | -0.0558 | (-0.1084, -0.0030) | -0.0302 | (-0.0829, 0.0227) | -0.0966 | (-0.1487, -0.0439) | graph_category graph_category_2 graph_scatterplot |
| 6 | NIST-v11b | 0.8446 | (0.8287, 0.8591) | 0.6548 | (0.6235, 0.6840) | 0.8413 | (0.8251, 0.8561) | graph_category graph_category_2 graph_scatterplot |
| 7 | ATEC4 | 0.6673 | (0.6369, 0.6957) | 0.5022 | (0.4616, 0.5408) | 0.6495 | (0.6179, 0.6791) | graph_category graph_category_2 graph_scatterplot |
| 8 | ATEC1 | 0.6746 | (0.6447, 0.7024) | 0.5105 | (0.4703, 0.5485) | 0.6513 | (0.6197, 0.6807) | graph_category graph_category_2 graph_scatterplot |
| 9 | SNR | 0.0470 | (-0.0059, 0.0996) | 0.0451 | (-0.0078, 0.0977) | -0.0022 | (-0.0551, 0.0507) | graph_category graph_category_2 graph_scatterplot |
| 10 | mBLEU | 0.6848 | (0.6556, 0.7119) | 0.4972 | (0.4563, 0.5360) | 0.6626 | (0.6319, 0.6913) | graph_category graph_category_2 graph_scatterplot |
| 11 | 4-GRR | 0.7388 | (0.7138, 0.7619) | 0.5497 | (0.5117, 0.5856) | 0.7484 | (0.7242, 0.7708) | graph_category graph_category_2 graph_scatterplot |
| 12 | ATEC2 | 0.6697 | (0.6395, 0.6979) | 0.5062 | (0.4658, 0.5445) | 0.6465 | (0.6146, 0.6762) | graph_category graph_category_2 graph_scatterplot |
| 13 | SEPIA1 | 0.8312 | (0.8141, 0.8468) | 0.6389 | (0.6066, 0.6692) | 0.8245 | (0.8068, 0.8407) | graph_category graph_category_2 graph_scatterplot |
| 14 | ULCopt | 0.0226 | (-0.0303, 0.0754) | 0.0252 | (-0.0277, 0.0780) | 0.0012 | (-0.0516, 0.0541) | graph_category graph_category_2 graph_scatterplot |
| 15 | EDPM | 0.8270 | (0.8095, 0.8430) | 0.6335 | (0.6008, 0.6641) | 0.8335 | (0.8166, 0.8489) | graph_category graph_category_2 graph_scatterplot |
| 16 | mTER | -0.6461 | (-0.6759, -0.6142) | -0.4594 | (-0.5001, -0.4166) | -0.5501 | (-0.5860, -0.5122) | graph_category graph_category_2 graph_scatterplot |
| 17 | BLEU-4 | 0.7810 | (0.7595, 0.8008) | 0.5891 | (0.5535, 0.6226) | 0.7883 | (0.7674, 0.8075) | graph_category graph_category_2 graph_scatterplot |
| 18 | METEOR-v0.6 | 0.7736 | (0.7514, 0.7940) | 0.5809 | (0.5448, 0.6149) | 0.7872 | (0.7662, 0.8065) | graph_category graph_category_2 graph_scatterplot |
| 19 | BadgerLite | 0.1815 | (0.1299, 0.2321) | 0.1245 | (0.0721, 0.1762) | 0.1781 | (0.1264, 0.2288) | graph_category graph_category_2 graph_scatterplot |
| 20 | METEOR-ranking | 0.8211 | (0.8031, 0.8376) | 0.6279 | (0.5948, 0.6589) | 0.8338 | (0.8169, 0.8492) | graph_category graph_category_2 graph_scatterplot |
| 21 | LET | 0.8014 | (0.7816, 0.8195) | 0.6050 | (0.5704, 0.6375) | 0.8024 | (0.7827, 0.8205) | graph_category graph_category_2 graph_scatterplot |
| 22 | DP-Or | 0.0236 | (-0.0293, 0.0763) | 0.0229 | (-0.0300, 0.0756) | 0.1104 | (0.0578, 0.1623) | graph_category graph_category_2 graph_scatterplot |
| 23 | ATEC3 | 0.6844 | (0.6552, 0.7115) | 0.5191 | (0.4794, 0.5567) | 0.6425 | (0.6104, 0.6726) | graph_category graph_category_2 graph_scatterplot |
| 24 | BLEU-v12 | 0.7829 | (0.7615, 0.8025) | 0.5908 | (0.5553, 0.6242) | 0.7874 | (0.7665, 0.8067) | graph_category graph_category_2 graph_scatterplot |
| 25 | BEwT-E | 0.7706 | (0.7482, 0.7912) | 0.5781 | (0.5418, 0.6123) | 0.7650 | (0.7422, 0.7861) | graph_category graph_category_2 graph_scatterplot |
| 26 | DR-Or | 0.0145 | (-0.0384, 0.0673) | 0.0169 | (-0.0360, 0.0697) | 0.0004 | (-0.0525, 0.0532) | graph_category graph_category_2 graph_scatterplot |
| 27 | BleuSP | 0.8045 | (0.7850, 0.8224) | 0.6100 | (0.5757, 0.6422) | 0.8111 | (0.7922, 0.8284) | graph_category graph_category_2 graph_scatterplot |
| 28 | SVM-Rank | 0.7894 | (0.7687, 0.8086) | 0.5987 | (0.5637, 0.6316) | 0.8008 | (0.7810, 0.8190) | graph_category graph_category_2 graph_scatterplot |
| 29 | BLEU-1 | 0.8180 | (0.7998, 0.8348) | 0.6254 | (0.5921, 0.6566) | 0.8169 | (0.7985, 0.8337) | graph_category graph_category_2 graph_scatterplot |
| 30 | Bleu-sbp | 0.7870 | (0.7660, 0.8063) | 0.5954 | (0.5602, 0.6285) | 0.7924 | (0.7718, 0.8112) | graph_category graph_category_2 graph_scatterplot |
| 31 | invWer | -0.8275 | (-0.8435, -0.8101) | -0.6341 | (-0.6647, -0.6014) | -0.8291 | (-0.8450, -0.8119) | graph_category graph_category_2 graph_scatterplot |
| 32 | BLEU-v11b | 0.7806 | (0.7591, 0.8005) | 0.5887 | (0.5530, 0.6222) | 0.7866 | (0.7656, 0.8060) | graph_category graph_category_2 graph_scatterplot |
| 33 | SR-Or | 0.0167 | (-0.0362, 0.0695) | 0.0168 | (-0.0361, 0.0696) | 0.0606 | (0.0078, 0.1131) | graph_category graph_category_2 graph_scatterplot |
| 34 | Badger | 0.1593 | (0.1073, 0.2104) | 0.1103 | (0.0578, 0.1622) | 0.1683 | (0.1165, 0.2192) | graph_category graph_category_2 graph_scatterplot |
| 35 | Meteor-v0.7 | 0.8368 | (0.8203, 0.8520) | 0.6454 | (0.6135, 0.6752) | 0.8489 | (0.8334, 0.8630) | graph_category graph_category_2 graph_scatterplot |
| 36 | MaxSim | 0.1182 | (0.0657, 0.1700) | 0.0916 | (0.0389, 0.1438) | 0.1344 | (0.0821, 0.1859) | graph_category graph_category_2 graph_scatterplot |
| 37 | TERp | -0.8070 | (-0.8247, -0.7878) | -0.6166 | (-0.6483, -0.5827) | -0.8095 | (-0.8270, -0.7905) | graph_category graph_category_2 graph_scatterplot |
37 metrics (including 7 baseline metrics)
1375 data points (total number of documents used)
|