/***************************************************************** EVAL_IRST_SP_LOC version 2.1 18/01/2005 This program evaluates the performance of a Speaker Localization system The program was developed by ITC-irst under the EU project CHIL This software is provided "AS IS" and without warranty of any kind. This program can be redistributed and/or modified freely provided that any derivative works bear some notice that they are derived from it, and any modified versions bear some notice that they have been further modified from the original. Copyright © 2004-2005 by ITC-irst. All rights reserved. ******************************************************************** The program "EVAL_IRST_SP_LOC" reads the results produced by a source localization procedure and compares them with a reference provided as result of manual transcription and labeling of data. Usage: ./EVAL_IRST_SP_LOC -reference -inputFile -evalOutput -evalSummary -thresholdLecturer -thresholdAudience -timestep -maxN There are no default parameters: all the options need to be provided with proper values on the command line. This version can process only data concerning the LECTURER scenario. In the reference data speaker_ID must be either "lecturer" or "audience" (or "-" if the number of speakers is not 1). This is only a preliminar version that will be subject of future improvements. Example of usage: EVAL_IRST_SP_LOC -reference ref.txt -inputFile input.txt -evalOutput output.txt -evalSummary summary.txt -thresholdLecturer 500 -thresholdAudience 1000 -timestep 0.667 -maxN 0 For more details, see the document CHIL-IRST_SpeakerLocEval-V5.0-2005-01-18 *****************************************************************/ #include #include #include #include #define NUM_PARAMETERS 8 /* Struct for the reference table */ typedef struct { float time; int nSpeakers; int nNoises; char *ID; float x; float y; float z; } refTable; /* Struct for the input table */ typedef struct { float time; float x; float y; float z; } inputTable; /* Enumeration for the error types */ typedef enum { fel, /* Fine Error Lecturer */ gel, /* Gross Error Lecturer */ dl, /* Deletion Lecturer */ fea, /* Fine Error Audience */ gea, /* Gross Error Audience */ da, /* Deletion Audience */ ns, /* No Speaker */ fa, /* False Alarm */ ms /* Ignored, Multiple Speakers */ }errorType; /* Struct for the output table */ typedef struct { float time; float error; errorType classification; float x; float y; float z; } outputTable; typedef enum { false, true } bool; float epsilon; int maxnoiseN; int n_ref_frames=0; void usage(char *name) { fprintf(stdout, "Usage: %s\n", name); fprintf(stdout, "\t-reference \n"); fprintf(stdout, "\t-inputFile \n"); fprintf(stdout, "\t-evalOutput \n"); fprintf(stdout, "\t-evalSummary \n"); fprintf(stdout, "\t-thresholdLecturer \n"); fprintf(stdout, "\t-thresholdAudience \n"); fprintf(stdout, "\t-timestep \n"); fprintf(stdout, "\t-maxN \n"); } void usageSmart(char *name, char *missingParameter) { fprintf(stdout, "\n Error using %s: a parameter is missing, specify -%s\n\n", name, missingParameter); } void printInputTable(inputTable **iTable, int iLength) { /* Something useful for debug */ FILE *f; int i; f = fopen("test","w"); for (i = 0; i < iLength; i++) { fprintf(f, " %.2f\t%.2f\t%.2f\t%.2f\n", iTable[i]->time, iTable[i]->x, iTable[i]->y, iTable[i]->z); } fclose(f); } void readRefTable(char *fileName, refTable ***table, int *length, float *Lasttime) { FILE *f; int lineSize; char *line, tmpstring[100]; int i; float InitialTime=-1.0; refTable **appTable; refTable *row; lineSize = 256; line = (char *) calloc(lineSize, sizeof(char)); appTable = NULL; i = 0; if ((f = fopen(fileName, "r")) == NULL) { fprintf(stderr,"File %s not found!", fileName); exit(-1); } /* Discard the first line, not useful */ fgets(line, lineSize, f); sscanf(line, "%*s %*s %*s %s", tmpstring); if(strcmp(tmpstring, "lecturer")&&strcmp(tmpstring, "audience")&&strcmp(tmpstring, "-")) { /* Header line found in reference file */ } else rewind(f); while (fgets(line, lineSize, f)) { n_ref_frames++; row = (refTable *) calloc(1,sizeof(refTable)); row->ID = (char *) calloc(50,sizeof(char)); appTable = (refTable **) realloc(appTable, (i + 1) * sizeof(refTable*)); /* Be careful with the possible strings... */ sscanf(line, "%f %d %d %s %f %f %f", &row->time, &row->nSpeakers, &row->nNoises, row->ID, &row->x, &row->y, &row->z); if(InitialTime==-1.0) InitialTime=row->time; if(strcmp(row->ID, "lecturer")&&strcmp(row->ID, "audience")&&strcmp(row->ID, "-")) { fprintf(stderr,"This program can process only data concerning the LECTURER scenario.\nSpeaker_ID must be either \"lecturer\" or \"audience\" \n(or \"-\" if the number of speakers is not 1).\n\n"); exit(1); } appTable[i++] = row; } free(line); *length = i; *table = appTable; fclose(f); *Lasttime=row->time-InitialTime; } void readInputTable(char *fileName, inputTable ***table, int *length, int *Tframes, float timeStep) { FILE *f; fpos_t lastPosition; /* Ptr to last item found to make the search clever */ int lineSize; char *line; int i; float timeStamp; int nEvents; inputTable **appTable; inputTable *row; inputTable *formattedRow; lineSize = 256; line = (char *) calloc(lineSize, sizeof(char)); formattedRow = (inputTable *) calloc(1,sizeof(inputTable)); appTable = NULL; i = 0; nEvents = 0; timeStamp=0; if ((f = fopen(fileName, "r")) == NULL) if (!f) { fprintf(stderr,"File %s not found!", fileName); exit(-1); } /* Discard the first line, not useful */ fgets(line, lineSize, f); if(strstr(line,"X") || strstr(line,"x")) { /* Header line found in input file */ } else rewind(f); fgetpos (f, &lastPosition); *Tframes=0; while (fgets(line, lineSize, f)) { row = (inputTable *) calloc(1,sizeof(inputTable)); sscanf(line, "%f %f %f %f", &row->time, &row->x, &row->y, &row->z); if(row->time < (timeStamp + timeStep / 2 ) ) { fgetpos(f, &lastPosition); formattedRow->x = formattedRow->x + row->x; formattedRow->y = formattedRow->y + row->y; formattedRow->z = formattedRow->z + row->z; nEvents = nEvents + 1; } else { if(nEvents != 0) { formattedRow->x = formattedRow->x/nEvents; formattedRow->y = formattedRow->y/nEvents; formattedRow->z = formattedRow->z/nEvents; formattedRow->time = timeStamp; appTable = (inputTable **) realloc(appTable, (i + 1) * sizeof(inputTable*)); appTable[i++] = formattedRow; (*Tframes)+=nEvents; } timeStamp = timeStamp + timeStep; nEvents = 0; formattedRow = (inputTable *) calloc(1,sizeof(inputTable)); fsetpos (f,&lastPosition); } } if(nEvents != 0) { formattedRow->x=formattedRow->x / nEvents; formattedRow->y=formattedRow->y / nEvents; formattedRow->z=formattedRow->z / nEvents; formattedRow->time = timeStamp; appTable = (inputTable **) realloc(appTable, (i + 1) * sizeof(inputTable*)); appTable[i++] = formattedRow; } fclose(f); free(line); *length = i; *table = appTable; (*Tframes)+=nEvents; } float distance(int x1, int y1, int z1, int x2, int y2, int z2) { return sqrt((x1 - x2) * (x1 - x2) + (y1 - y2) * (y1 - y2) + (z1 - z2) * (z1 - z2)); } outputTable **createResponse(refTable **rTable, int rLength, inputTable **iTable, int iLength, int thresholdLecturer, int thresholdAudience) { int i, j, last; outputTable **table; outputTable *row; float error; last = 0; /* Last element visited in the input table */ /* The output table has the same number of lines of the reference table */ table = (outputTable **) calloc(rLength, sizeof(outputTable*)); for (i = 0; i < rLength; i++) { row = (outputTable *) calloc(1,sizeof(outputTable)); row->time = rTable[i]->time; for (j = last; j < iLength; j++) { /* Look for the same time stamp */ if ((rTable[i]->time - epsilon) <= iTable[j]->time && (rTable[i]->time + epsilon) > iTable[j]->time) { if (rTable[i]->nSpeakers == 0) { row->classification = fa; row->error = -1; break; } if ((rTable[i]->nSpeakers > 1)||(rTable[i]->nNoises > maxnoiseN)) { row->classification = ms; row->error = -1; break; } /* Distance computation */ row->x = (iTable[j]->x) - (rTable[i]->x); row->y = (iTable[j]->y) - (rTable[i]->y); row->z = (iTable[j]->z) - (rTable[i]->z); error = distance(rTable[i]->x, rTable[i]->y, rTable[i]->z, iTable[j]->x, iTable[j]->y, iTable[j]->z); row->error = error; if (!strcmp(rTable[i]->ID, "lecturer")) { if(error <= thresholdLecturer) row->classification = fel; else row->classification = gel; } if (!strcmp(rTable[i]->ID, "audience")) { if(error <= thresholdAudience) row->classification = fea; else row->classification = gea; } last = j; break; } } if (j == iLength) { /* Element non found in input file */ row->error = -1; /* For ND value */ if (rTable[i]->nSpeakers == 0) { /* No speakers */ row->classification = ns; } else if ((rTable[i]->nSpeakers == 1) &&(rTable[i]->nNoises == 0)) { /* Deletion */ if (!strcmp(rTable[i]->ID, "lecturer")) row->classification = dl; else row->classification = da; } else /* in any other situation we have to neglect the fact that the loc. system did not locate anything */ { /* Multiple speakers */ row->classification = ms; } } table[i] = row; /* Save the line in the output table */ } return table; } void printOutputTable(outputTable **oTable, int rLength, char *evalOutput, float timeStep) { FILE *f; int i; if ((f = fopen(evalOutput,"w")) == NULL) { printf("\n Open Error for %s\n", evalOutput); exit(1); } fprintf(f, "%s\t%s\t%s\n", "Frame_Time[s]", "Error[mm]", "Classification"); for (i = 0; i < rLength; i++) { fprintf(f, " %.2f\t\t", oTable[i]->time); if(oTable[i]->error == -1) fprintf(f, "ND\t\t"); else fprintf(f,"%.0f\t\t",oTable[i]->error); switch (oTable[i]->classification) { case fel: fprintf(f, "Fine Error Lecturer\n"); break; case gel: fprintf(f, "Gross Error Lecturer\n"); break; case dl: fprintf(f, "Deletion Lecturer\n"); break; case fea: fprintf(f, "Fine Error Audience\n"); break; case gea: fprintf(f, "Gross Error Audience\n"); break; case ns: fprintf(f, "No Speaker\n"); break; case da: fprintf(f, "Deletion Audience\n"); break; case fa: fprintf(f, "False Alarm\n"); break; case ms: fprintf(f, "Ignored (Multiple Speakers)\n"); break; } } } void createSummary(outputTable **oTable, int rLength, int totframes, float lasttime, char *evalSummary) { float xBias_fine[2]; float yBias_fine[2]; float zBias_fine[2]; float xBias_overall[2]; float yBias_overall[2]; float zBias_overall[2]; float FalseAlarm = 0; float nNoSpeaker = 0; float RMSE_fine[2]; float RMSE_overall[2]; float nCorLoc[2]; float deletion[2]; float nLocation[2]; int i; FILE *f; if ((f = fopen(evalSummary,"w")) == NULL) { printf("\n Open Error for %s\n", evalSummary); exit(1); } for (i = 0; i < 2; i++) { xBias_fine[i] = 0; yBias_fine[i] = 0; zBias_fine[i] = 0; xBias_overall[i] = 0; yBias_overall[i] = 0; zBias_overall[i] = 0; nCorLoc[i] = 0; nLocation[i] = 0; RMSE_fine[i] = 0; RMSE_overall[i] = 0; deletion[i] = 0; } for (i = 0; i < rLength; i++) { if (oTable[i]->classification == fel) { nCorLoc[0] = nCorLoc[0] + 1; RMSE_fine[0] = RMSE_fine[0] + (oTable[i]->error * oTable[i]->error); xBias_fine[0] = xBias_fine[0] + oTable[i]->x; yBias_fine[0] = yBias_fine[0] + oTable[i]->y; zBias_fine[0] = zBias_fine[0] + oTable[i]->z; } if (oTable[i]->classification == gel || oTable[i]->classification == fel) { nLocation[0] = nLocation[0] + 1; RMSE_overall[0] = RMSE_overall[0] + (oTable[i]->error * oTable[i]->error); xBias_overall[0] = xBias_overall[0] + oTable[i]->x; yBias_overall[0] = yBias_overall[0] + oTable[i]->y; zBias_overall[0] = zBias_overall[0] + oTable[i]->z; } if (oTable[i]->classification == dl) deletion[0] = deletion[0] + 1; if (oTable[i]->classification == fea) { nCorLoc[1] = nCorLoc[1] + 1; RMSE_fine[1] = RMSE_fine[1] + (oTable[i]->error * oTable[i]->error); xBias_fine[1] = xBias_fine[1] + oTable[i]->x; yBias_fine[1] = yBias_fine[1] + oTable[i]->y; zBias_fine[1] = zBias_fine[1] + oTable[i]->z; } if (oTable[i]->classification == gea || oTable[i]->classification == fea) { nLocation[1] = nLocation[1] + 1; RMSE_overall[1] = RMSE_overall[1] + (oTable[i]->error * oTable[i]->error); xBias_overall[1] = xBias_overall[1] + oTable[i]->x; yBias_overall[1] = yBias_overall[1] + oTable[i]->y; zBias_overall[1] = zBias_overall[1] + oTable[i]->z; } if (oTable[i]->classification == da) deletion[1] = deletion[1] + 1; if (oTable[i]->classification == fa) FalseAlarm = FalseAlarm + 1; if (oTable[i]->classification == ns) nNoSpeaker = nNoSpeaker + 1; } fprintf(f, "%s\t\t\t\t%s\t\t%s\t\t%s\n", "", "Lecturer", "Audience", "Overall"); fprintf(f, "%s\t\t\t\t%.2f\t\t\t%.2f\t\t\t%.2f\n", "Pcor", nCorLoc[0] / nLocation[0],nCorLoc[1] / nLocation[1],(nCorLoc[0]+nCorLoc[1])/(nLocation[0]+nLocation[1]) ); fprintf(f, "%s\t\t(%.0f,%.0f,%.0f)\t\t(%.0f,%.0f,%.0f)\t\t(%.0f,%.0f,%.0f)\n", "Bias fine (x,y,z)[mm]", xBias_fine[0] / nCorLoc[0], yBias_fine[0] / nCorLoc[0], zBias_fine[0] / nCorLoc[0], xBias_fine[1] / nCorLoc[1], yBias_fine[1] / nCorLoc[1], zBias_fine[1] / nCorLoc[1], (xBias_fine[0]+xBias_fine[1])/(nCorLoc[0]+nCorLoc[1]), (yBias_fine[0]+yBias_fine[1])/(nCorLoc[0]+nCorLoc[1]), (zBias_fine[0]+zBias_fine[1])/(nCorLoc[0]+nCorLoc[1])); fprintf(f, "%s\t(%.0f,%.0f,%.0f)\t\t(%.0f,%.0f,%.0f)\t\t(%.0f,%.0f,%.0f)\n", "Bias fine+gross (x,y,z)[mm]", xBias_overall[0] / nLocation[0], yBias_overall[0] / nLocation[0], zBias_overall[0] / nLocation[0], xBias_overall[1] / nLocation[1], yBias_overall[1] / nLocation[1], zBias_overall[1] / nLocation[1], (xBias_overall[0]+xBias_overall[1])/(nLocation[0]+nLocation[1]), (yBias_overall[0]+yBias_overall[1])/(nLocation[0]+nLocation[1]), (zBias_overall[0]+zBias_overall[1])/(nLocation[0]+nLocation[1])); fprintf(f, "%s\t\t\t%.0f\t\t\t%.0f\t\t\t%.0f\n", "RMSE fine [mm]", sqrt(RMSE_fine[0] / nCorLoc[0]), sqrt(RMSE_fine[1] / nCorLoc[1]), sqrt((RMSE_fine[0]+RMSE_fine[1])/(nCorLoc[0]+nCorLoc[1]))); fprintf(f, "%s\t\t%.0f\t\t\t%.0f\t\t\t%.0f\n","RMSE fine+gross [mm]", sqrt(RMSE_overall[0] / nLocation[0]), sqrt(RMSE_overall[1] / nLocation[1]), sqrt((RMSE_overall[0]+RMSE_overall[1])/(nLocation[0]+nLocation[1]))); fprintf(f, "%s\t\t\t%.2f\t\t\t%.2f\t\t\t%.2f\n", "Deletion rate", deletion[0] / (nLocation[0] + deletion[0]), deletion[1] / (nLocation[1] + deletion[1]), (deletion[0]+deletion[1])/(nLocation[0] + deletion[0]+nLocation[1] + deletion[1]) ); fprintf(f, "%s\t\t\t\t\t\t\t\t%.2f\n", "False Alarm rate", FalseAlarm / (FalseAlarm + nNoSpeaker)); fprintf(f, "%s %.0f\t\t%.0f\t\t%.0f\n","Loc. frames for error statistics",nLocation[0],nLocation[1],(nLocation[0]+nLocation[1])); fprintf(f, "Total n. of output loc.frames=%d\t Reference Duration=%.2f\t Average Frames/sec=%.2f\n",totframes,lasttime,totframes/lasttime); fprintf(f, "Total n. reference frames=%d\t\n",n_ref_frames); } int main(int argc, char **argv) { char *reference; char *inputFile; char *evalOutput; char *evalSummary; char *thresholdLecturer_str; int thresholdLecturer; char *thresholdAudience_str; int thresholdAudience; char *timeStep_str; double timeStep=0.1; /*default value */ char *max_nNoises_str; int max_nNoises; char *pName; char *pch; refTable **rTable; inputTable **iTable; outputTable **oTable; int rLength; int iLength; int totalframes; float Ltime; int i; int numberParameterMiss; char *parameterName[NUM_PARAMETERS]; bool parameterFlag[NUM_PARAMETERS]; for (i = 0; i < NUM_PARAMETERS; i++) { parameterFlag[i] = false; parameterName[i] = (char *) calloc(512, sizeof(char)); } numberParameterMiss = 0; parameterName[0] = "reference"; parameterName[1] = "inputFile"; parameterName[2] = "evalOutput"; parameterName[3] = "evalSummary"; parameterName[4] = "thresholdLecturer"; parameterName[5] = "thresholdAudience"; parameterName[6] = "timestep"; parameterName[7] = "maxN"; pName = argv[0]; if (argc > 2 * NUM_PARAMETERS + 1) { fprintf(stdout, "%s : too many parameters\n\n", pName); usage(pName); return 0; } for (--argc, ++argv; argc && **argv == '-';) { char *arg; arg = *argv++; --argc; if (!strcmp(arg, "--")) { break; } else if (!strcmp(arg, "-reference")) { if (argc--) { reference = strdup(*argv++); pch = strchr(reference, '-'); if (pch == NULL) parameterFlag[0] = true; } } else if (!strcmp(arg, "-inputFile")) { if (argc--) { inputFile = strdup(*argv++); pch = strchr(inputFile, '-'); if (pch == NULL) parameterFlag[1] = true; } } else if (!strcmp(arg, "-evalOutput")) { if (argc--) { evalOutput = strdup(*argv++); pch = strchr(evalOutput, '-'); if (pch == NULL) parameterFlag[2] = true; } } else if (!strcmp(arg, "-evalSummary")) { if (argc--) { evalSummary = strdup(*argv++); pch = strchr(evalSummary, '-'); if (pch == NULL) parameterFlag[3] = true; } } else if (!strcmp(arg, "-thresholdLecturer")) { if (argc--) { thresholdLecturer_str = strdup(*argv++); pch = strchr(thresholdLecturer_str, '-'); thresholdLecturer = atoi(thresholdLecturer_str); if (pch == NULL) parameterFlag[4] = true; } } else if (!strcmp(arg, "-thresholdAudience")) { if (argc--) { thresholdAudience_str = strdup(*argv++); pch = strchr(thresholdAudience_str, '-'); thresholdAudience = atoi(thresholdAudience_str); if (pch == NULL) parameterFlag[5] = true; } } else if (!strcmp(arg, "-timestep")) { if (argc--) { timeStep_str = strdup(*argv++); pch = strchr(timeStep_str, '-'); timeStep = atof(timeStep_str); if (pch == NULL) parameterFlag[6] = true; epsilon=timeStep/2; } } else if (!strcmp(arg, "-maxN")) { if (argc--) { max_nNoises_str = strdup(*argv++); pch = strchr(max_nNoises_str, '-'); max_nNoises = atoi(max_nNoises_str); if (pch == NULL) parameterFlag[7] = true; maxnoiseN=max_nNoises; } } else fprintf(stderr, "Invalid option %s\n", arg); if (argc < 0) break; } for (i = 0; i < NUM_PARAMETERS; i++) { if (parameterFlag[i] == false) { usageSmart(pName, parameterName[i]); numberParameterMiss++; } } if (numberParameterMiss != 0) { usage(pName); return 0; } fprintf(stdout, "\nRunning %s " , pName); fprintf(stdout, "\n\t-reference %s ", reference); fprintf(stdout, "\n\t-inputFile %s ", inputFile); fprintf(stdout, "\n\t-evalOutput %s ", evalOutput); fprintf(stdout, "\n\t-evalSummary %s ", evalSummary); fprintf(stdout, "\n\t-thresholdLecturer %d ", thresholdLecturer); fprintf(stdout, "\n\t-thresholdAudience %d ", thresholdAudience); fprintf(stdout, "\n\t-timestep %f ", timeStep); fprintf(stdout, "\n\t-maxN %d ", max_nNoises); fprintf(stdout, "\n\n"); /* Read the reference table */ readRefTable(reference, &rTable, &rLength, &Ltime); /* Read the input table */ readInputTable(inputFile, &iTable, &iLength, &totalframes, timeStep); /* Creation of the output table */ oTable = createResponse(rTable, rLength, iTable, iLength, thresholdLecturer, thresholdAudience); /* Print on file the output table */ printOutputTable(oTable, rLength, evalOutput, timeStep); /* Creation and printing on file of the summary */ createSummary(oTable, rLength, totalframes, Ltime, evalSummary); return 0; }