Main Page · Modules · All Classes · Class Hierarchy
MASoundBasicAnalyzer.cpp
1 /*
2  * This file is part of the AiBO+ project
3  *
4  * Copyright (C) 2005-2016 Csaba Kertész (csaba.kertesz@gmail.com)
5  *
6  * AiBO+ is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * AiBO+ is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
19  *
20  */
21 
22 #include "MASoundBasicAnalyzer.hpp"
23 
24 #include "core/MANum.hpp"
25 #include "MASoundAnalyzerPrivate.hpp"
26 #include "MASoundData.hpp"
27 
28 #include <MCSampleStatistics.hpp>
29 
30 #include <libxtract.h>
31 
32 const int MASoundBasicAnalyzer::MfccCount = 26;
33 
34 MASoundBasicAnalyzer::MASoundBasicAnalyzer(unsigned int frequency, bool only_mfcc_components) : MAAnalyzer(),
35  Frequency((double)frequency), WindowSize((int)MASoundData::GetWindowSize(frequency)),
36  SlidingWindowLimit(0), HannWindow(nullptr), BarkBandLimits(nullptr), OnlyMfccCoefs(only_mfcc_components)
37 {
38  // Set up filter bank
39  Wrapper.reset(new FilterBankWrapper);
40  Wrapper->FilterBank = (xtract_mel_filter*)malloc(sizeof(xtract_mel_filter));
41  Wrapper->FilterBank->n_filters = MfccCount;
42  Wrapper->FilterBank->filters = (double**)malloc(Wrapper->FilterBank->n_filters*sizeof(double*));
43  for (int i = 0; i < Wrapper->FilterBank->n_filters; ++i)
44  {
45  Wrapper->FilterBank->filters[i] = (double*)malloc(WindowSize / 2*sizeof(double));
46  }
47  xtract_init_fft(WindowSize, XTRACT_SPECTRUM);
48  xtract_init_mfcc(WindowSize / 2, Frequency, XTRACT_EQUAL_AREA, 1, Frequency,
49  Wrapper->FilterBank->n_filters, Wrapper->FilterBank->filters);
50  // Set up Hann window
51  HannWindow = xtract_init_window(WindowSize, XTRACT_HANN);
52  // Init bark band limits
53  xtract_init_wavelet_f0_state();
54  BarkBandLimits = (int*)malloc(sizeof(int)*XTRACT_BARK_BANDS);
55  xtract_init_bark(WindowSize, Frequency, &BarkBandLimits[0]);
56 }
57 
58 
59 MASoundBasicAnalyzer::~MASoundBasicAnalyzer()
60 {
61  // Clean up filter bank
62  for (int i = 0; i < Wrapper->FilterBank->n_filters; ++i)
63  {
64  free(Wrapper->FilterBank->filters[i]);
65  }
66  free(Wrapper->FilterBank->filters);
67  free(Wrapper->FilterBank);
68  // Clean up other things
69  xtract_free_fft();
70  xtract_free_window(HannWindow);
71  HannWindow = nullptr;
72  free(BarkBandLimits);
73  BarkBandLimits = nullptr;
74 }
75 
76 
78 {
79  // Nothing to do
80 }
81 
82 
84 {
85  DataWindows.clear();
86  LastDeltaFrame.clear();
87  LastMmbsesDeltaFrame.clear();
88  FeatureVectors.clear();
89  Buffer.clear();
90  SlidingWindow.clear();
91 }
92 
93 
95 {
96  return FeatureVectors.size() > 3;
97 }
98 
99 
101 {
102  if (!IsValid())
103  return MC::FloatList();
104 
105  // Generate the horizontal features even if the sliding window is full yet
106  if ((int)SlidingWindow.size() < SlidingWindowLimit-1)
107  {
108  for (unsigned int i = 0; i < FeatureVectors.size(); ++i)
110  }
111  MC::FloatList Temp = FeatureVectors[0];
112 
113  FeatureVectors.erase(FeatureVectors.begin());
114  return Temp;
115 }
116 
117 
118 MC::FloatTable MASoundBasicAnalyzer::GetFeatureVectors(unsigned int desired_vector_count,
119  unsigned int erased_vector_count)
120 {
121  if (!IsValid())
122  return MC::FloatTable();
123 
124  // Generate the horizontal features even if the sliding window is not full yet
125  if ((int)SlidingWindow.size() < SlidingWindowLimit-1)
126  {
127  for (unsigned int i = 0; i < FeatureVectors.size(); ++i)
129  }
130  unsigned int Count = (desired_vector_count == 0 ? FeatureVectors.size() :
131  MCMin(desired_vector_count, (unsigned int)FeatureVectors.size()));
132  MC::FloatTable Temp(FeatureVectors.begin(), FeatureVectors.begin()+Count);
133  unsigned int EraseCount = (erased_vector_count == 0 ? Count : MCMin(erased_vector_count, Count));
134 
135  FeatureVectors.erase(FeatureVectors.begin(), FeatureVectors.begin()+EraseCount);
136  return Temp;
137 }
138 
139 
141 {
142  return FeatureVectors.size();
143 }
144 
145 
147 {
149 }
150 
151 
152 void MASoundBasicAnalyzer::AddSoundData(const MC::DoubleList& raw_data)
153 {
154  int RemainingCount = 0;
155 
156  MCMergeContainers(Buffer, raw_data);
157  if ((int)Buffer.size() < WindowSize)
158  return;
159 
161  RemainingCount += (int)((float)WindowSize*2 / 3);
162  Buffer.erase(Buffer.begin(), Buffer.begin()+Buffer.size()-RemainingCount);
164  DataWindows.clear();
165 }
166 
167 
169 {
170 #if !defined(__AIBO_BUILD__)
171  if (DataWindows.size() < 3)
172  return;
173 
174  MC::DoubleTable MfccFrames;
175  MC::DoubleTable MmbsesFrames;
176  MC::DoubleTable MfccEnergyFrames;
177  MC::DoubleTable SpectrumFrames;
178  MC::DoubleTable DeltaFrames;
179  MC::DoubleTable MmbsesDeltaFrames;
180 
181  // Generate the mfcc frames
182  for (unsigned int i = 0; i < DataWindows.size(); ++i)
183  {
184  // Note: the UserData variable is modified for other function calls from libxtract
185  double UserData[4] = { (double)Frequency / WindowSize, XTRACT_MAGNITUDE_SPECTRUM, 0.0f, 0.0f};
186  double UserData2[4] = { (double)Frequency / WindowSize, XTRACT_SPECTRUM_COEFFICIENTS, 0.0f, 0.0f};
187  MC::DoubleList Temp;
188  MC::DoubleList MfccCoefficients;
189  MC::DoubleList MmbsesCoefficients;
190  MC::DoubleList MfccEnergies;
191  MC::DoubleList Spectrum(WindowSize, 0);
192 
193  Temp.resize(Wrapper->FilterBank->n_filters);
194  MfccCoefficients.resize(MfccCount);
195  MmbsesCoefficients.resize(MfccCount);
196  MfccEnergies.resize(MfccCount);
197  xtract_spectrum((double*)&(DataWindows[i][0]), WindowSize, UserData2, (double*)&(Spectrum[0]));
198  xtract_mmbses((double*)&(Spectrum[0]), WindowSize / 2, Wrapper->FilterBank, (double*)&Temp[0]);
199  memcpy(&MmbsesCoefficients[0], &Temp[0], sizeof(double)*MfccCount);
200  MmbsesFrames.push_back(MmbsesCoefficients);
201  xtract_spectrum((double*)&(DataWindows[i][0]), WindowSize, UserData, (double*)&(Spectrum[0]));
202  xtract_mfcc((double*)&(Spectrum[0]), WindowSize / 2, Wrapper->FilterBank, (double*)&Temp[0]);
203  memcpy(&MfccCoefficients[0], &Temp[0], sizeof(double)*MfccCount);
204  MfccFrames.push_back(MfccCoefficients);
205  xtract_mfcc_without_dct((double*)&(Spectrum[0]), WindowSize / 2, Wrapper->FilterBank, (double*)&Temp[0]);
206  memcpy(&MfccEnergies[0], &Temp[0], sizeof(double)*MfccCount);
207  MfccEnergyFrames.push_back(MfccEnergies);
208  SpectrumFrames.push_back(Spectrum);
209  }
210  // Generate MFCC delta features
211  for (unsigned int i = 0; i < MfccFrames.size(); ++i)
212  {
213  MC::DoubleList DeltaFrame;
214  MC::DoubleList MmbsesDeltaFrame;
215 
216  // Put placeholder empty frames
217  if (i == 0)
218  {
219  DeltaFrames.push_back(LastDeltaFrame);
220  MmbsesDeltaFrames.push_back(LastMmbsesDeltaFrame);
221  continue;
222  }
223  if (i == MfccFrames.size()-1)
224  {
225  DeltaFrames.push_back(DeltaFrame);
226  MmbsesDeltaFrames.push_back(LastMmbsesDeltaFrame);
227  continue;
228  }
229 
230  for (unsigned int i1 = 0; i1 < MfccFrames[i].size(); ++i1)
231  {
232  double Numerator = 0;
233  double MmbsesNumerator = 0;
234  double Denominator = 0;
235 
236  for (unsigned int N = 1; N < 3; ++N)
237  {
238  int PreviousIndex = (i == 1 ? 0 : i-N);
239  int NextIndex = (i == MfccFrames.size()-2 ? MfccFrames.size()-1 : i+N);
240 
241  Numerator += N*(MfccFrames[PreviousIndex][i1]-MfccFrames[NextIndex][i1]);
242  MmbsesNumerator += N*(MmbsesFrames[PreviousIndex][i1]-MmbsesFrames[NextIndex][i1]);
243  Denominator += N*N;
244  }
245  DeltaFrame.push_back(Numerator / 2 / Denominator);
246  MmbsesDeltaFrame.push_back(MmbsesNumerator / 2 / Denominator);
247  }
248  DeltaFrames.push_back(DeltaFrame);
249  MmbsesDeltaFrames.push_back(MmbsesDeltaFrame);
250  }
251  for (unsigned int i = 0; i < DataWindows.size(); ++i)
252  {
253  MC::FloatList FinalVector;
254 
255  // Add MFCC components except the first to the feature vector
256  for (unsigned int i1 = 1; i1 < MfccFrames[i].size(); ++i1)
257  {
258  FinalVector.push_back((float)MfccFrames[i][i1]);
259  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(Mfcc, i1)
260  }
261  if (!OnlyMfccCoefs)
262  {
263  // Add MEL-MBSES components except the first to the feature vector
264  for (unsigned int i1 = 0; i1 < MmbsesFrames[i].size(); ++i1)
265  {
266  FinalVector.push_back((float)MmbsesFrames[i][i1]);
267  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(Mmbses, i1)
268  }
269  // Add MFCC energies except the first to the feature vector
270  for (unsigned int i1 = 1; i1 < MfccEnergyFrames[i].size(); ++i1)
271  {
272  FinalVector.push_back((float)MfccEnergyFrames[i][i1]);
273  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MfccEnergy, i1)
274  }
275  }
276  int MfccDeltaNeutralSum = 0;
277  const float NeutralLimit = 0.2;
278  const int HistogramBins = 20;
279  const float HistogramLimit = 1.0;
280  MC::IntList Histogram(HistogramBins, 0);
281 
282  // Add delta MFCC components to the feature vector
283  for (unsigned int i1 = 0; i1 < DeltaFrames[i].size(); ++i1)
284  {
285  if (DeltaFrames[i][i1] > -NeutralLimit && DeltaFrames[i][i1] < NeutralLimit)
286  MfccDeltaNeutralSum++;
287 
288  if (DeltaFrames[i][i1] <= -HistogramLimit)
289  Histogram[0]++;
290  else
291  if (DeltaFrames[i][i1] >= HistogramLimit)
292  Histogram[HistogramBins-1]++;
293  else {
294  MANum<int> Index((int)((DeltaFrames[i][i1]+HistogramLimit)*(HistogramBins-1) / 2 / HistogramLimit), 0, HistogramBins-1);
295 
296  Histogram[(int)Index]++;
297  }
298  if (!OnlyMfccCoefs)
299  {
300  FinalVector.push_back((float)DeltaFrames[i][i1]);
301  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MfccDelta, i1)
302  }
303  }
304  FinalVector.push_back((float)MfccDeltaNeutralSum);
305  MA_ANALYZER_ADD_FEATURE_NAME("MfccDeltaNeutralSum")
306  if (!OnlyMfccCoefs && DeltaFrames[i].size() == 0)
307  {
308  for (unsigned int i1 = 0; i1 < DeltaFrames[1].size(); ++i1)
309  {
310  FinalVector.push_back(0);
311  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MfccDelta, i1)
312  }
313  }
314  for (int i1 = 0; i1 < HistogramBins; ++i1)
315  {
316  FinalVector.push_back((float)Histogram[i1]);
317  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MfccDeltaHistogram, i1)
318  }
319  // Add delta MEL-MBSES components to the feature vector
320  int MmbsesDeltaNeutralSum = 0;
321 
322  Histogram = MC::IntList(HistogramBins, 0);
323  for (unsigned int i1 = 0; i1 < MmbsesDeltaFrames[i].size(); ++i1)
324  {
325  if (MmbsesDeltaFrames[i][i1] > -NeutralLimit && MmbsesDeltaFrames[i][i1] < NeutralLimit)
326  MmbsesDeltaNeutralSum++;
327 
328  if (MmbsesDeltaFrames[i][i1] <= -HistogramLimit)
329  Histogram[0]++;
330  else
331  if (MmbsesDeltaFrames[i][i1] >= HistogramLimit)
332  Histogram[HistogramBins-1]++;
333  else {
334  MANum<int> Index((int)((MmbsesDeltaFrames[i][i1]+HistogramLimit)*(HistogramBins-1) / 2 / HistogramLimit), 0, HistogramBins-1);
335 
336  Histogram[(int)Index]++;
337  }
338  if (!OnlyMfccCoefs)
339  {
340  FinalVector.push_back((float)MmbsesDeltaFrames[i][i1]);
341  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MmbsesDelta, i1)
342  }
343  }
344  FinalVector.push_back((float)MmbsesDeltaNeutralSum);
345  MA_ANALYZER_ADD_FEATURE_NAME("MmbsesDeltaNeutralSum")
346  if (!OnlyMfccCoefs && MmbsesDeltaFrames[i].size() == 0)
347  {
348  for (unsigned int i1 = 0; i1 < MmbsesDeltaFrames[1].size(); ++i1)
349  {
350  FinalVector.push_back(0);
351  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MmbsesDelta, i1)
352  }
353  }
354  for (int i1 = 0; i1 < HistogramBins; ++i1)
355  {
356  FinalVector.push_back((float)Histogram[i1]);
357  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(MbsesDeltaHistogram, i1)
358  }
359 
364 
365  FinalVector.push_back((float)Mean);
366  MA_ANALYZER_ADD_FEATURE_NAME("DataMean")
367  FinalVector.push_back((float)StDev);
368  MA_ANALYZER_ADD_FEATURE_NAME("DataStDev")
371 
372  FinalVector.push_back(TQ-FQ);
373  MA_ANALYZER_ADD_FEATURE_NAME("DataIqr")
374  FinalVector.push_back((float)Min);
375  MA_ANALYZER_ADD_FEATURE_NAME("DataMin")
376  FinalVector.push_back((float)Max);
377  MA_ANALYZER_ADD_FEATURE_NAME("DataMax")
378  FinalVector.push_back((float)Max-Min);
379  MA_ANALYZER_ADD_FEATURE_NAME("DataRange")
380  {
381  double Mean = MCCalculateVectorStatistic(MfccFrames[i], *new MCArithmeticMean<double>);
382  double Min = MCCalculateVectorStatistic(MfccFrames[i], *new MCMinimum<double>);
383  double Max = MCCalculateVectorStatistic(MfccFrames[i], *new MCMaximum<double>);
384  double StDev = MCCalculateVectorStatistic(MfccFrames[i], *new MCStandardDeviation<double>);
385 
386  FinalVector.push_back((float)Mean);
387  MA_ANALYZER_ADD_FEATURE_NAME("MfccMean")
388  FinalVector.push_back((float)StDev);
389  MA_ANALYZER_ADD_FEATURE_NAME("MfccStDev")
390  float TQ = (float)MCCalculateVectorStatistic(MfccFrames[i], *new MCFirstQuartile<double>);
391  float FQ = (float)MCCalculateVectorStatistic(MfccFrames[i], *new MCThirdQuartile<double>);
392 
393  FinalVector.push_back(TQ-FQ);
394  MA_ANALYZER_ADD_FEATURE_NAME("MfccIqr")
395  FinalVector.push_back((float)Min);
396  MA_ANALYZER_ADD_FEATURE_NAME("MfccMin")
397  FinalVector.push_back((float)Max);
398  MA_ANALYZER_ADD_FEATURE_NAME("MfccMax")
399  FinalVector.push_back((float)Max-Min);
400  MA_ANALYZER_ADD_FEATURE_NAME("MfccRange")
401  }
402  {
403  double UserData[4] = { 0.0f, 0.0f, 0.0f, 0.0f};
404  double BarkCoefficients[XTRACT_BARK_BANDS-1];
405  MC::DoubleList Amplitudes(SpectrumFrames[i]);
406  double SpectrumMean = 0;
407  double SpectrumStDev = 0;
408  double SpectrumGeometricMean = 0;
409  double Flatness = 0;
410  double FlatnessDb = 0;
411  double AverageDeviation = 0;
412  double CRest = 0;
413  double F0 = 0;
414  double Hps = 0;
415  double IrregularityJ = 0;
416  double IrregularityK = 0;
417  double Kurtosis = 0;
418  double Loudness = 0;
419  MC::DoubleList Peaks(WindowSize, 0);
420  MC::DoubleList Harmonics(WindowSize, 0);
421  MC::DoubleList PeaksFirstHalf(WindowSize / 2, 0);
422  MC::DoubleList HarmonicsFirstHalf(WindowSize / 2, 0);
423  double HarmonicsMean = 0;
424  double HarmonicsStDev = 0;
425  double PeaksMean = 0;
426  double PeaksStDev = 0;
427  int PartialsCount = 0;
428  int HarmonicsCount = 0;
429  double Noisiness = 0;
430  float NonZeroCount = 0;
431  double OddEvenRatio = 0;
432  double RmsAmplitude = 0;
433  double RollOff = 0;
434 // double Sharpness = 0;
435  double Skewness = 0;
436  double Smoothness = 0;
437  double SpectralCentroid = 0;
438  double SpectralInharmonicity = 0;
439  double SpectralKurtosis = 0;
440  double SpectralSkewness = 0;
441  double SpectralSlope = 0;
442  double SpectralVariance = 0;
443  double Sum = 0;
444  double Tonality = 0;
445  double Tristimulus1 = 0;
446  double Tristimulus2 = 0;
447  double Tristimulus3 = 0;
448  double Variance = 0;
449  double WaveletF0 = 0;
450  double ZeroCrossRate = 0;
451  MC::DoubleList Temp;
452 
453  xtract_bark_coefficients((double*)&(SpectrumFrames[i][0]), WindowSize / 2, &BarkBandLimits[0], &BarkCoefficients[0]);
454  // Note: This resize() cuts only the second half of the container to preserve only the attributes
455  Amplitudes.resize(WindowSize / 2);
456  SpectrumMean = MCCalculateVectorStatistic(Amplitudes, *new MCArithmeticMean<double>);
457  FinalVector.push_back((float)SpectrumMean);
458  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumMean")
459  SpectrumGeometricMean = MCCalculateVectorStatistic(Amplitudes, *new MCGeometricMean<double>);
460  FinalVector.push_back((float)SpectrumGeometricMean);
461  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumGeometricMean")
462  SpectrumStDev = MCCalculateVectorStatistic(Amplitudes, *new MCStandardDeviation<double>);
463  FinalVector.push_back((float)SpectrumStDev);
464  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumStDev")
465 
466  Flatness = SpectrumGeometricMean / SpectrumMean;
467  FinalVector.push_back((float)Flatness);
468  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumFlatness")
469  xtract_flatness_db(nullptr, 0, &Flatness, &FlatnessDb);
470  FinalVector.push_back((float)FlatnessDb);
471  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumFlatnessDb")
472  xtract_average_deviation((double*)&(DataWindows[i][0]), WindowSize, &Mean, &AverageDeviation);
473  FinalVector.push_back((float)AverageDeviation);
474  MA_ANALYZER_ADD_FEATURE_NAME("DataAverageDeviation")
475  UserData[0] = Max;
476  UserData[1] = Mean;
477  xtract_crest(nullptr, 0, UserData, &CRest);
478  FinalVector.push_back((float)CRest);
479  MA_ANALYZER_ADD_FEATURE_NAME("DataAverageCRest")
480  xtract_failsafe_f0((double*)&(DataWindows[i][0]), WindowSize, &Frequency, &F0);
481  if (MCIsFloatInfinity((float)F0))
482  {
483  F0 = 0;
484  FinalVector.push_back((float)0);
485  } else
486  if (F0 > 48000.0)
487  {
488  F0 = 0;
489  FinalVector.push_back((float)-1);
490  } else {
491  FinalVector.push_back((float)F0);
492  }
493  MA_ANALYZER_ADD_FEATURE_NAME("DataF0")
494  xtract_hps((double*)&(SpectrumFrames[i][0]), WindowSize, nullptr, &Hps);
495  FinalVector.push_back((float)Hps);
496  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumHps")
497  xtract_irregularity_j((double*)&(SpectrumFrames[i][0]), WindowSize, nullptr, &IrregularityJ);
498  FinalVector.push_back((float)IrregularityJ);
499  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumIrregularityJ")
500  xtract_irregularity_k((double*)&(SpectrumFrames[i][0]), WindowSize, nullptr, &IrregularityK);
501  FinalVector.push_back((float)IrregularityK);
502  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumIrregularityK")
503  UserData[0] = Mean;
504  UserData[1] = StDev;
505  xtract_kurtosis((double*)&(DataWindows[i][0]), WindowSize, UserData, &Kurtosis);
506  FinalVector.push_back((float)Kurtosis);
507  MA_ANALYZER_ADD_FEATURE_NAME("DataKurtosis")
508  xtract_loudness(&BarkCoefficients[0], XTRACT_BARK_BANDS-1, nullptr, &Loudness);
509  FinalVector.push_back((float)Loudness);
510  MA_ANALYZER_ADD_FEATURE_NAME("DataLoudness")
511  UserData[0] = (double)Frequency / WindowSize;
512  // TODO: Is this 10% of peak threshold is good?
513  UserData[1] = 10.0;
514  xtract_peak_spectrum((double*)&(SpectrumFrames[i][0]), WindowSize / 2, UserData, (double*)&(Peaks[0]));
515  memcpy(&PeaksFirstHalf[0], &Peaks[0], WindowSize / 2*sizeof(double));
516  PeaksMean = MCCalculateVectorStatistic(PeaksFirstHalf, *new MCArithmeticMean<double>);
517  FinalVector.push_back((float)PeaksMean);
518  MA_ANALYZER_ADD_FEATURE_NAME("PeaksMean")
519  PeaksStDev = MCCalculateVectorStatistic(PeaksFirstHalf, *new MCStandardDeviation<double>);
520  FinalVector.push_back((float)PeaksStDev);
521  MA_ANALYZER_ADD_FEATURE_NAME("PeaksStDev")
522 
523  UserData[0] = F0;
524  // TODO: Is this threshold is good?
525  UserData[1] = 0.2;
526  xtract_harmonic_spectrum((double*)&(Peaks[0]), WindowSize, UserData, (double*)&(Harmonics[0]));
527  memcpy(&HarmonicsFirstHalf[0], &Harmonics[0], WindowSize / 2*sizeof(double));
528  HarmonicsMean = MCCalculateVectorStatistic(HarmonicsFirstHalf, *new MCArithmeticMean<double>);
529  FinalVector.push_back((float)HarmonicsMean);
530  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicsMean")
531  HarmonicsStDev = MCCalculateVectorStatistic(HarmonicsFirstHalf, *new MCStandardDeviation<double>);
532  FinalVector.push_back((float)HarmonicsStDev);
533  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicsStDev")
534  HarmonicsCount = (int)MCCalculateVectorStatistic(Harmonics, *new MCNonZeroCount<double>);
535  PartialsCount = (int)MCCalculateVectorStatistic(Peaks, *new MCNonZeroCount<double>);
536  FinalVector.push_back((float)HarmonicsCount);
537  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicsCount")
538  FinalVector.push_back((float)PartialsCount);
539  MA_ANALYZER_ADD_FEATURE_NAME("PartialsCount")
540  UserData[0] = (double)HarmonicsCount;
541  UserData[1] = (double)PartialsCount;
542  xtract_noisiness(nullptr, 0, UserData, &Noisiness);
543  FinalVector.push_back((float)Noisiness);
544  MA_ANALYZER_ADD_FEATURE_NAME("Noisiness")
546  FinalVector.push_back(NonZeroCount);
547  MA_ANALYZER_ADD_FEATURE_NAME("DataNonZeroCount")
548  xtract_odd_even_ratio((double*)&(Harmonics[0]), WindowSize, &F0, &OddEvenRatio);
549  FinalVector.push_back((float)OddEvenRatio);
550  MA_ANALYZER_ADD_FEATURE_NAME("OddEvenRatio")
551  xtract_rms_amplitude((double*)&(DataWindows[i][0]), WindowSize, nullptr, &RmsAmplitude);
552  FinalVector.push_back((float)RmsAmplitude);
553  MA_ANALYZER_ADD_FEATURE_NAME("RmsAmplitude")
554  UserData[0] = (double)Frequency / WindowSize;
555  // TODO: Is this threshold is good?
556  UserData[1] = 0.2;
557  xtract_rolloff((double*)&(SpectrumFrames[i][0]), WindowSize / 2, UserData, &RollOff);
558  FinalVector.push_back((float)RollOff);
559  MA_ANALYZER_ADD_FEATURE_NAME("RollOff")
560  // SHARPNESS IS WRONG
561 // xtract_sharpness((double*)&(SpectrumFrames[i][0]), WindowSize / 2, NULL, &Sharpness);
562 // FinalVector.push_back((float)Sharpness);
563 // MA_ANALYZER_ADD_FEATURE_NAME("Sharpness")
564  UserData[0] = Mean;
565  UserData[1] = StDev;
566  xtract_skewness((double*)&(DataWindows[i][0]), WindowSize, UserData, &Skewness);
567  FinalVector.push_back((float)Skewness);
568  MA_ANALYZER_ADD_FEATURE_NAME("DataSkewness")
569  xtract_smoothness((double*)&(SpectrumFrames[i][0]), WindowSize / 2, nullptr, &Smoothness);
570  FinalVector.push_back((float)Smoothness);
571  MA_ANALYZER_ADD_FEATURE_NAME("Smoothness")
572  xtract_spectral_centroid((double*)&(SpectrumFrames[i][0]), WindowSize, nullptr, &SpectralCentroid);
573  FinalVector.push_back((float)SpectralCentroid);
574  MA_ANALYZER_ADD_FEATURE_NAME("SpectralCentroid")
575  xtract_spectral_centroid((double*)&(Peaks[0]), WindowSize, nullptr, &SpectralCentroid);
576  FinalVector.push_back((float)SpectralCentroid);
577  MA_ANALYZER_ADD_FEATURE_NAME("PeakCentroid")
578  xtract_spectral_centroid((double*)&(Harmonics[0]), WindowSize, nullptr, &SpectralCentroid);
579  FinalVector.push_back((float)SpectralCentroid);
580  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicCentroid")
581  xtract_spectral_inharmonicity((double*)&(Peaks[0]), WindowSize / 2, &F0, &SpectralInharmonicity);
582  FinalVector.push_back((float)SpectralInharmonicity);
583  MA_ANALYZER_ADD_FEATURE_NAME("SpectralInharmonicity")
584  UserData[0] = SpectrumMean;
585  UserData[1] = SpectrumStDev;
586  xtract_spectral_kurtosis((double*)&(SpectrumFrames[i][0]), WindowSize, UserData, &SpectralKurtosis);
587  if (SpectralKurtosis > 1000000)
588  {
589  FinalVector.push_back((float)0);
590  } else {
591  FinalVector.push_back((float)SpectralKurtosis);
592  }
593  MA_ANALYZER_ADD_FEATURE_NAME("SpectralKurtosis")
594  UserData[0] = PeaksMean;
595  UserData[1] = PeaksStDev;
596  xtract_spectral_kurtosis((double*)&(Peaks[0]), WindowSize, UserData, &SpectralKurtosis);
597  if (SpectralKurtosis > 1000000)
598  {
599  FinalVector.push_back((float)0);
600  } else {
601  FinalVector.push_back((float)SpectralKurtosis);
602  }
603  MA_ANALYZER_ADD_FEATURE_NAME("PeakKurtosis")
604  UserData[0] = HarmonicsMean;
605  UserData[1] = HarmonicsStDev;
606  xtract_spectral_kurtosis((double*)&(Harmonics[0]), WindowSize, UserData, &SpectralKurtosis);
607  if (SpectralKurtosis > 1000000)
608  {
609  FinalVector.push_back((float)0);
610  } else {
611  FinalVector.push_back((float)SpectralKurtosis);
612  }
613  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicKurtosis")
614  UserData[0] = SpectrumMean;
615  UserData[1] = SpectrumStDev;
616  xtract_spectral_skewness((double*)&(SpectrumFrames[i][0]), WindowSize, UserData, &SpectralSkewness);
617  if (SpectralSkewness > 10000000)
618  {
619  FinalVector.push_back((float)0);
620  } else {
621  FinalVector.push_back((float)SpectralSkewness);
622  }
623  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumSkewness")
624  xtract_spectral_skewness((double*)&(Peaks[0]), WindowSize, UserData, &SpectralSkewness);
625  if (SpectralSkewness > 10000000)
626  {
627  FinalVector.push_back((float)0);
628  } else {
629  FinalVector.push_back((float)SpectralSkewness); // F96
630  }
631  MA_ANALYZER_ADD_FEATURE_NAME("PeakSkewness")
632  UserData[0] = HarmonicsMean;
633  UserData[1] = HarmonicsStDev;
634  xtract_spectral_skewness((double*)&(Harmonics[0]), WindowSize, UserData, &SpectralSkewness);
635  if (SpectralSkewness > 10000000)
636  {
637  FinalVector.push_back((float)0);
638  } else {
639  FinalVector.push_back((float)SpectralSkewness);
640  }
641  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicSkewness")
642  xtract_spectral_slope((double*)&(SpectrumFrames[i][0]), WindowSize, nullptr, &SpectralSlope);
643  FinalVector.push_back((float)SpectralSlope);
644  MA_ANALYZER_ADD_FEATURE_NAME("SpectralSlope")
645  xtract_spectral_slope((double*)&(Peaks[0]), WindowSize, nullptr, &SpectralSlope);
646  FinalVector.push_back((float)SpectralSlope);
647  MA_ANALYZER_ADD_FEATURE_NAME("PeakSlope")
648  xtract_spectral_slope((double*)&(Harmonics[0]), WindowSize, nullptr, &SpectralSlope);
649  FinalVector.push_back((float)SpectralSlope); // F100
650  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicSlope")
651  UserData[0] = SpectrumMean;
652  xtract_spectral_variance((double*)&(SpectrumFrames[i][0]), WindowSize, UserData, &SpectralVariance);
653  FinalVector.push_back((float)SpectralVariance);
654  MA_ANALYZER_ADD_FEATURE_NAME("SpectralVariance")
655  UserData[0] = PeaksMean;
656  xtract_spectral_variance((double*)&(Peaks[0]), WindowSize, UserData, &SpectralVariance);
657  FinalVector.push_back((float)SpectralVariance);
658  MA_ANALYZER_ADD_FEATURE_NAME("PeakVariance")
659  UserData[0] = HarmonicsMean;
660  xtract_spectral_variance((double*)&(Harmonics[0]), WindowSize, UserData, &SpectralVariance);
661  FinalVector.push_back((float)SpectralVariance);
662  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicVariance")
663  xtract_sum((double*)&(DataWindows[i][0]), WindowSize, nullptr, &Sum);
664  FinalVector.push_back((float)Sum);
665  MA_ANALYZER_ADD_FEATURE_NAME("DataSum")
666  xtract_tonality(nullptr, 0, &FlatnessDb, &Tonality);
667  FinalVector.push_back((float)Tonality);
668  MA_ANALYZER_ADD_FEATURE_NAME("Tonality")
669  xtract_tristimulus_1((double*)&(Harmonics[0]), WindowSize, &F0, &Tristimulus1);
670  FinalVector.push_back((float)Tristimulus1);
671  MA_ANALYZER_ADD_FEATURE_NAME("Tristimulus1")
672  xtract_tristimulus_2((double*)&(Harmonics[0]), WindowSize, &F0, &Tristimulus2);
673  FinalVector.push_back((float)Tristimulus2);
674  MA_ANALYZER_ADD_FEATURE_NAME("Tristimulus2")
675  xtract_tristimulus_3((double*)&(Harmonics[0]), WindowSize, &F0, &Tristimulus3);
676  FinalVector.push_back((float)Tristimulus3);
677  MA_ANALYZER_ADD_FEATURE_NAME("Tristimulus3")
678  xtract_variance((double*)&(DataWindows[i][0]), WindowSize, &Mean, &Variance);
679  FinalVector.push_back((float)Variance);
680  MA_ANALYZER_ADD_FEATURE_NAME("DataVariance")
681  xtract_wavelet_f0((double*)&(DataWindows[i][0]), WindowSize, &Frequency, &WaveletF0);
682  FinalVector.push_back((float)WaveletF0);
683  MA_ANALYZER_ADD_FEATURE_NAME("WaveletF0")
684  xtract_zcr((double*)&(DataWindows[i][0]), WindowSize, nullptr, &ZeroCrossRate);
685  FinalVector.push_back((float)ZeroCrossRate);
686  MA_ANALYZER_ADD_FEATURE_NAME("ZeroCrossRate")
687  Temp.resize(Wrapper->FilterBank->n_filters);
688  xtract_spectral_subband_centroids((double*)&(Peaks[0]), WindowSize / 2, Wrapper->FilterBank, (double*)&Temp[0]);
689  if (!OnlyMfccCoefs)
690  {
691  for (unsigned int i1 = 2; i1 < Temp.size(); ++i1)
692  {
693  FinalVector.push_back((float) Temp[i1]);
694  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(PeakBandCentroid, i1)
695  }
696  }
697  xtract_spectral_subband_centroids((double*)&(SpectrumFrames[i][0]), WindowSize / 2,
698  Wrapper->FilterBank, (double*)&Temp[0]);
699  NonZeroCount = MCCalculateVectorStatistic(Temp, *new MCNonZeroCount<double>);
700  FinalVector.push_back(NonZeroCount);
701  MA_ANALYZER_ADD_FEATURE_NAME("SpectrumCentroidsNonZeroCount")
702  if (!OnlyMfccCoefs)
703  {
704  for (unsigned int i1 = 0; i1 < Temp.size(); ++i1)
705  {
706  FinalVector.push_back((float)Temp[i1]);
707  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(SpectrumBandCentroid, i1)
708  }
709  }
710  xtract_spectral_subband_centroids((double*)&(Harmonics[0]), WindowSize / 2,
711  Wrapper->FilterBank, (double*)&Temp[0]);
712  NonZeroCount = MCCalculateVectorStatistic(Temp, *new MCNonZeroCount<double>);
713  FinalVector.push_back(NonZeroCount);
714  MA_ANALYZER_ADD_FEATURE_NAME("HarmonicCentroidsNonZeroCount")
715  if (!OnlyMfccCoefs)
716  {
717  for (unsigned int i1 = 0; i1 < Temp.size(); ++i1)
718  {
719  FinalVector.push_back((float) Temp[i1]);
720  MA_ANALYZER_ADD_FEATURE_NAME_WITH_INDEX(HarmonicBandCentroid, i1)
721  }
722  }
723  }
724  FinalVector = ApplyHorizontalFeatures(FinalVector);
725  MA_ANALYZER_FEATURE_NAMES_COMMIT(FinalVector);
726  FeatureVectors.push_back(FinalVector);
727  // Generate the horizontal features when the sliding window is full
728  if (SlidingWindowLimit > 0 && (int)SlidingWindow.size() == SlidingWindowLimit-1)
729  {
730  for (unsigned int i1 = 0; i1 < FeatureVectors.size(); ++i1)
732  }
733  }
734  if (!DeltaFrames.empty())
735  LastDeltaFrame = DeltaFrames.back();
736  if (!MmbsesDeltaFrames.empty())
737  LastMmbsesDeltaFrame = MmbsesDeltaFrames.back();
738 #endif
739 }
740 
741 
742 MC::FloatList MASoundBasicAnalyzer::ApplyHorizontalFeatures(const MC::FloatList& vector)
743 {
744  SlidingWindow.push_back(vector);
745  if ((int)SlidingWindow.size() > SlidingWindowLimit)
746  {
747  SlidingWindow.erase(SlidingWindow.begin());
748  } else
749  if ((int)SlidingWindow.size() < SlidingWindowLimit)
750  {
751  return vector;
752  }
753  MC::FloatList NewVector = vector;
754 
756  return NewVector;
757 }
758 
759 
761 {
762  return MC::FloatList();
763 }
764 
765 
766 MC::DoubleTable MASoundBasicAnalyzer::GenerateHannWindows(const MC::DoubleList& audio_data, int& remaining_count)
767 {
768  MC::DoubleTable Windows;
769 
770  // Create the windows
771  for (int i = 0; i <= (int)audio_data.size()-WindowSize; i += WindowSize / 3)
772  {
773  MC::DoubleList Window;
774 
775  Window.resize(WindowSize);
776  xtract_windowed((double*)&audio_data[i], WindowSize, HannWindow, (double*)&Window[0]);
777  Windows.push_back(Window);
778  remaining_count = (int)audio_data.size()-i-WindowSize+1;
779  }
780  return Windows;
781 }
virtual bool IsValid() const override
Check if enough samples were added to the analyzer.
Non-zero sample count statistic.
int SlidingWindowLimit
Sliding window count.
Minimum statistic.
bool OnlyMfccCoefs
Calculate only mfcc band components.
Binary data class.
const T MCMin(const U &container)
Get the minimal value of a container.
Geometric mean statistic.
int * BarkBandLimits
Bark band limits.
MC::FloatTable GetFeatureVectors(unsigned int desired_vector_count=0, unsigned int erased_vector_count=0)
Get all feature vectors.
Analyzer base class.
Definition: MAAnalyzer.hpp:67
boost::scoped_ptr< FilterBankWrapper > Wrapper
Filter bank for MFCC calculation.
float MCCalculateVectorStatistic(const std::vector< T > &vector, MCSampleStatistic< T > &statistic)
Calculate a statistic over a vector.
unsigned int GetFeatureVectorCount() const
Get the available feature vector count.
Handling sound data.
Definition: MASoundData.hpp:38
static const int MfccCount
Number of mfcc components.
MC::DoubleList LastDeltaFrame
Last delta frame.
static MC::DoubleList ConvertToDouble(const MCBinaryData &raw_data)
Convert audio data to a double list.
virtual void Reset() override
Reset the analyzer and drop all samples.
virtual MC::FloatList CalculateHorizontalFeatures()
Calculate horizontal features for the feature vectors.
MC::FloatTable FeatureVectors
Feature vectors.
First quartile statistic.
double Frequency
Audio frequency.
MASoundBasicAnalyzer(unsigned int frequency, bool only_mfcc_components=false)
Class constructor.
bool MCIsFloatInfinity(const float value)
Check a value for float infinity.
Definition: MCDefs.cpp:122
MC::DoubleList LastMmbsesDeltaFrame
Last MMBSES frame.
virtual void AddSamples(const MARobotState &robot_state) override
Add new samples from the robot state.
MC::FloatList ApplyHorizontalFeatures(const MC::FloatList &vector)
Add a feature vector to a sliding window to calculate horizontal features.
MC::DoubleTable GenerateHannWindows(const MC::DoubleList &audio_data, int &remaining_count)
Generate Hann windows from audio data.
MC::DoubleList Buffer
Audio buffer.
double * HannWindow
Hann window.
Third quartile statistic.
MC::FloatTable SlidingWindow
Sliding window of feature vectors.
void MCMergeContainers(U &target, const U &other)
Merge two containers.
virtual MC::FloatList GetFeatureVector() override
Get a feature vector.
Maximum statistic.
Standard deviation statistic.
virtual void GenerateFeatureVectors()
Generate feature vector frames.
int WindowSize
Ideal window size.
void AddSoundData(const MCBinaryData &raw_data)
Add raw (byte) sound data.
Robot state.
MC::DoubleTable DataWindows
Audio data windows.
Arithmetic mean statistic.