Main Page · Modules · All Classes · Class Hierarchy
MAClassifier.cpp
1 /*
2  * This file is part of the AiBO+ project
3  *
4  * Copyright (C) 2005-2016 Csaba Kertész (csaba.kertesz@gmail.com)
5  *
6  * AiBO+ is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * AiBO+ is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
19  *
20  */
21 
22 #include "MAClassifier.hpp"
23 
24 #include "MAClassifierPrivate.hpp"
25 #include "MAModel.hpp"
26 
27 #include <3rdparty/archives/portable_iarchive.hpp>
28 #include <3rdparty/archives/portable_oarchive.hpp>
29 #include <MCBinaryData.hpp>
30 #include <MCContainers.hpp>
31 #include <MCDataContainer.hpp>
32 #include <MCLog.hpp>
33 #include <MCSampleStatistics.hpp>
34 
35 #include <opencv/cv.h>
36 #include <opencv2/core/core.hpp>
37 
38 #if __cplusplus >= 201103L && defined(__clang__)
39 #pragma GCC diagnostic push
40 #pragma GCC diagnostic ignored "-Wdeprecated-register"
41 #endif
42 #include <Eigen/Dense>
43 #if __cplusplus >= 201103L && defined(__clang__)
44 #pragma GCC diagnostic pop
45 #endif
46 #include <serialize.h>
47 #include <maxent.h>
48 #include <lwpr.hh>
49 #if defined(__unix__)
50 #include <gp.h>
51 #include <gp_utils.h>
52 #endif
53 #include <pls.h>
54 
55 #include <boost/archive/binary_oarchive.hpp>
56 #include <boost/archive/binary_iarchive.hpp>
57 #include <boost/iostreams/device/back_inserter.hpp>
58 #include <boost/iostreams/stream.hpp>
59 #include <boost/serialization/vector.hpp>
60 
61 namespace
62 {
63 class InitOpenCVRNG
64 {
65 public:
66  InitOpenCVRNG()
67  {
68  Reset();
69  }
70 
71  void Reset()
72  {
73  // Note: Need to reset the random seed before doing anything with OpenCV's ML module otherwise
74  // inconsistent results are calculated (at least with train_auto() in SVM)
75  cv::theRNG().state = MCRand(0, 10000000);
76  }
77 };
78 
79 static InitOpenCVRNG InitOpenCVRNGWrapper;
80 }
81 
82 #if defined(__unix__)
83 class GpWrapper
84 {
85 public:
86  boost::scoped_ptr<libgp::GaussianProcess> GprScPtr;
87 };
88 #else
89 class GpWrapper
90 {
91 };
92 #endif
93 
94 
95 MAClassifier::MAClassifier() : Classifier(MA::DecisionTreeClassifier),
96  Preprocessing(MA::FeatureStandardization), Regression(false), Trained(false), FeatureCount(0),
97  LabelCount(1), SvmGamma(0.00001), SvmC(0.1), SvmP(0.01), SvmNu(0.0001), SvmAutoTrain(false),
98  DlibEpsilon(0.00001), RvmSigmoidGamma(0.00001), RvmRbfGamma(0.0002),
99  KrrGamma(1.5), KrrLambda(0.01), MeL1(0.00001), MeL2(0.005), LwprAlpha(0.2), PlsrComponents(0),
100  KrlsTolerance(0.001), KrlsGamma(0.0002), TreeMaxDepth(20), TreeNodeSampleLimit(1),
101  RtMaxForestSize(30)
102 {
103 }
104 
105 
106 MAClassifier::MAClassifier(MA::CRMethodType method, int label_count, bool regression) :
107  Classifier(method), Preprocessing(MA::FeatureStandardization), Regression(regression),
108  Trained(false), FeatureCount(0), LabelCount(label_count <= 0 ? 1 : label_count),
109  SvmGamma(0.00001), SvmC(0.1), SvmP(1), SvmNu(0.0001), SvmAutoTrain(false),
110  DlibEpsilon(0.00001), RvmSigmoidGamma(0.00001), RvmRbfGamma(0.0002), KrrGamma(1.5),
111  KrrLambda(0.01), MeL1(0.00001), MeL2(0.005), LwprAlpha(0.2), PlsrComponents(0),
112  KrlsTolerance(0.001), KrlsGamma(0.0002), TreeMaxDepth(20), TreeNodeSampleLimit(1),
113  RtMaxForestSize(30)
114 {
116 }
117 
118 
119 MAClassifier::~MAClassifier()
120 {
121 }
122 
123 
125 {
126  return !CachedSamples.empty();
127 }
128 
129 
130 MA::CRMethodType MAClassifier::GetMethodType() const
131 {
132  return Classifier;
133 }
134 
135 
137 {
138  return Regression;
139 }
140 
141 
143 {
144  return CachedSamples.size() == 0 ? 0 : CachedSamples[0].size();
145 }
146 
147 
149 {
150  return CachedSamples;
151 }
152 
153 
154 MC::FloatList& MAClassifier::GetLabels()
155 {
156  return CachedLabels;
157 }
158 
159 
160 MC::FloatList MAClassifier::GetModelLabels() const
161 {
162  MC::FloatList Vector(CachedUniqueLabels.begin(), CachedUniqueLabels.end());
163 
164  return Vector;
165 }
166 
167 
168 void MAClassifier::PrioritizeClasses(const MC::FloatList& labels)
169 {
170  PrioritizedClasses = labels;
171 }
172 
173 
175 {
176  PreprocessedData.clear();
177  CachedSamples.clear();
178  CachedLabels.clear();
179  CachedUniqueLabels.clear();
180  if (Classifier == MA::NeuralNetworkClassifier)
181  {
182  NeuralNetwork->clear();
183  } else
184  if (Classifier == MA::NaiveBayesClassifier)
185  {
186  BayesClassifier->clear();
187  } else
188  if (Classifier == MA::KNearestNeighborClassifier)
189  {
190  KNearestClassifier->clear();
191  } else
192  if (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierNuRbf)
193  {
194  SvmClassifier->clear();
198  SvmClassifierParams->gamma = SvmGamma;
199  } else
200  if (Classifier == MA::DecisionTreeClassifier)
201  {
202  DecisionTree->clear();
203  } else
204  if (Classifier == MA::RandomTreesClassifier)
205  {
206  RandomTrees->clear();
207  } else
208  if (Classifier == MA::ExtremeRandomTreesClassifier)
209  {
210  ExtremeRandomTrees->clear();
211  } else
212  if (Classifier == MA::GradientBoostedTreesClassifier)
213  {
214  GradientBoostedTrees->clear();
215  } else
216  if (Classifier == MA::ExpectationMaximization)
217  {
218  EmClassifier->Classifier->clear();
219  } else
220  if ((Classifier >= MA::SvmClassifierCLinearEkm && Classifier <= MA::SvmClassifierCLinearDlib) ||
221  (Classifier >= MA::RvmClassifierLinear && Classifier <= MA::KrRegressionRbf) ||
222  (Classifier >= MA::KrlsLinearRegression && Classifier <= MA::KrlsRbfRegression))
223  {
224  DlibFunctions.reset(nullptr);
225  } else
226  if (Classifier == MA::MaxEntropyClassifierL1 || Classifier == MA::MaxEntropyClassifierL2)
227  {
228  MaxEntropy.reset(nullptr);
229  } else
230  if (Classifier == MA::LwpRegression)
231  {
232  Lwpr.reset(nullptr);
233  } else
234 #if defined(__unix__)
235  if (Classifier == MA::GaussianProcessRegression)
236  {
237  Gpr->GprScPtr.reset(nullptr);
238  } else
239 #endif
240  if (Classifier == MA::PlsRegression)
241  {
242  Plsr.reset(nullptr);
243  PlsrComponents = 0;
244  }
245  Trained = false;
246  FeatureCount = 0;
247 }
248 
249 
250 void MAClassifier::SetParameter(MA::CRMethodParamType method_parameter, float value)
251 {
252  if (Trained)
253  {
254  MC_WARNING("Custom classifier parameter can't be set after training.");
255  return;
256  }
257  if (method_parameter == MA::SvmGammaParam)
258  {
259  SvmGamma = value;
260  if (SvmClassifierParams.get())
261  SvmClassifierParams->gamma = SvmGamma;
262  } else
263  if (method_parameter == MA::SvmCParam)
264  {
265  SvmC = value;
266  if (SvmClassifierParams.get())
268  } else
269  if (method_parameter == MA::SvmPParam)
270  {
271  SvmP = value;
272  if (SvmClassifierParams.get())
274  } else
275  if (method_parameter == MA::SvmNuParam)
276  {
277  SvmNu = value;
278  if (SvmClassifierParams.get())
280  } else
281  if (method_parameter == MA::SvmAutoTrainParam)
282  {
283  SvmAutoTrain = (bool)value;
284  } else
285  if (method_parameter == MA::DlibEpsilonParam)
286  {
287  DlibEpsilon = value;
288  } else
289  if (method_parameter == MA::RvmSigmoidGammaParam)
290  {
291  RvmSigmoidGamma = value;
292  } else
293  if (method_parameter == MA::RvmRbfGammaParam)
294  {
295  RvmRbfGamma = value;
296  } else
297  if (method_parameter == MA::KrrRbfGammaParam)
298  {
299  KrrGamma = value;
300  } else
301  if (method_parameter == MA::KrrLambdaParam)
302  {
303  KrrLambda = value;
304  } else
305  if (method_parameter == MA::MeL1Param)
306  {
307  MeL1 = value;
308  } else
309  if (method_parameter == MA::MeL2Param)
310  {
311  MeL2 = value;
312  } else
313  if (method_parameter == MA::LwprAlphaParam)
314  {
315  LwprAlpha = value;
316  } else
317  if (method_parameter == MA::PlsrComponentsParam)
318  {
319  PlsrComponents = (int)value;
320  if (PlsrComponents < 0)
321  PlsrComponents = 0;
322  } else
323  if (method_parameter == MA::KrlsToleranceParam)
324  {
325  KrlsTolerance = value;
326  } else
327  if (method_parameter == MA::KrlsGammaParam)
328  {
329  KrlsGamma = value;
330  } else
331  if (method_parameter == MA::TreeMaxDepthParam)
332  {
333  TreeMaxDepth = (int)value;
334  if (TreeMaxDepth < 0)
335  TreeMaxDepth = 20;
336  } else
337  if (method_parameter == MA::TreeNodeSampleLimitParam)
338  {
339  TreeNodeSampleLimit = (int)value;
340  if (TreeNodeSampleLimit < 0)
341  TreeNodeSampleLimit = 1;
342  } else
343  if (method_parameter == MA::RtMaxForestSizeParam)
344  {
345  RtMaxForestSize = (int)value;
346  if (RtMaxForestSize < 0)
347  RtMaxForestSize = 20;
348  }
349 }
350 
351 
352 float MAClassifier::GetParameter(MA::CRMethodParamType method_parameter)
353 {
354  if (method_parameter == MA::SvmGammaParam)
355  return SvmGamma;
356  else
357  if (method_parameter == MA::SvmCParam)
358  return SvmC;
359  else
360  if (method_parameter == MA::SvmPParam)
361  return SvmP;
362  else
363  if (method_parameter == MA::SvmNuParam)
364  return SvmNu;
365  else
366  if (method_parameter == MA::SvmAutoTrainParam)
367  return (float)SvmAutoTrain;
368  else
369  if (method_parameter == MA::DlibEpsilonParam)
370  return DlibEpsilon;
371  else
372  if (method_parameter == MA::RvmSigmoidGammaParam)
373  return RvmSigmoidGamma;
374  else
375  if (method_parameter == MA::RvmRbfGammaParam)
376  return RvmRbfGamma;
377  else
378  if (method_parameter == MA::KrrRbfGammaParam)
379  return KrrGamma;
380  else
381  if (method_parameter == MA::KrrLambdaParam)
382  return KrrLambda;
383  else
384  if (method_parameter == MA::MeL1Param)
385  return MeL1;
386  else
387  if (method_parameter == MA::MeL2Param)
388  return MeL2;
389  else
390  if (method_parameter == MA::LwprAlphaParam)
391  return LwprAlpha;
392  else
393  if (method_parameter == MA::PlsrComponentsParam)
394  return (float)PlsrComponents;
395  else
396  if (method_parameter == MA::KrlsToleranceParam)
397  return KrlsTolerance;
398  else
399  if (method_parameter == MA::KrlsGammaParam)
400  return KrlsGamma;
401  else
402  if (method_parameter == MA::TreeMaxDepthParam)
403  return (float)TreeMaxDepth;
404  else
405  if (method_parameter == MA::TreeNodeSampleLimitParam)
406  return (float)TreeNodeSampleLimit;
407  else
408  if (method_parameter == MA::RtMaxForestSizeParam)
409  return (float)RtMaxForestSize;
410 
411  return MCFloatInfinity();
412 }
413 
414 
415 void MAClassifier::SetPreprocessingMode(MA::FeaturePreprocessingType preprocessing_mode)
416 {
417  if (Trained)
418  {
419  MC_WARNING("Preprocessing mode can't be changed after training.");
420  return;
421  }
422  Preprocessing = preprocessing_mode;
423 }
424 
425 
426 void MAClassifier::AddSamples(const MC::FloatTable& input_vectors, const MC::FloatList& labels)
427 {
428  if (input_vectors.empty() || input_vectors[0].size() == 0)
429  {
430  MC_WARNING("No data in the input table for classifier training.");
431  return;
432  }
433  // Check if the table is appropriate for training
434  int RowSize = input_vectors[0].size();
435 
436  for (unsigned int i = 1; i < input_vectors.size(); ++i)
437  {
438  if ((int)input_vectors[i].size() != RowSize)
439  {
440  MC_WARNING("Row counts are inconsistent in the input table for classifier training.");
441  return;
442  }
443  if (FeatureCount != 0 && input_vectors[i].size() != FeatureCount)
444  {
445  MC_WARNING("Feature count does not match with the expected training vector size (%d != %d).",
446  FeatureCount, input_vectors[i].size());
447  return;
448  }
449  }
450  if (input_vectors.size() != labels.size())
451  {
452  MC_WARNING("Label counts are inconsistent with the input table size for classifier training.");
453  return;
454  }
455  // Remember for the feature count
456  if (unlikely(FeatureCount == 0))
457  {
458  FeatureCount = RowSize;
459  }
460  MCMergeContainers(CachedSamples, input_vectors);
462  CachedUniqueLabels.insert(labels.begin(), labels.end());
463  Trained = false;
464  if ((Classifier >= MA::SvmClassifierCLinearEkm && Classifier <= MA::SvmClassifierCLinearDlib) ||
465  (Classifier >= MA::RvmClassifierLinear && Classifier <= MA::KrRegressionRbf) ||
466  (Classifier >= MA::KrlsLinearRegression && Classifier <= MA::KrlsRbfRegression))
467  {
468  DlibFunctions.reset(nullptr);
469  return;
470  } else
471  if (Classifier == MA::MaxEntropyClassifierL1 || Classifier == MA::MaxEntropyClassifierL2)
472  {
473  MaxEntropy.reset(nullptr);
474  return;
475  } else
476  if (Classifier == MA::LwpRegression)
477  {
478  Lwpr.reset(nullptr);
479  return;
480  } else
481 #if defined(__unix__)
482  if (Classifier == MA::GaussianProcessRegression)
483  {
484  Gpr->GprScPtr.reset(nullptr);
485  return;
486  } else
487 #endif
488  if (Classifier == MA::PlsRegression)
489  {
490  Plsr.reset(nullptr);
491  return;
492  }
493 }
494 
495 
496 MC::FloatList MAClassifier::Predict(const MC::FloatTable& input_vectors, MC::FloatList& confidences)
497 {
498  if (input_vectors.empty())
499  {
500  MC_WARNING("No data in the input table for classifier prediction.");
501  return MC::FloatList();
502  }
503  // Check if the table is appropriate for training
504  int RowCount = input_vectors[0].size();
505 
506  if (RowCount == 0)
507  {
508  MC_WARNING("No data in the input table for classifier prediction.");
509  return MC::FloatList();
510  }
511  for (unsigned int i = 1; i < input_vectors.size(); ++i)
512  {
513  if ((int)input_vectors[i].size() != RowCount)
514  {
515  MC_WARNING("Row counts are inconsistent in the input table for classifier prediction.");
516  return MC::FloatList();
517  }
518  if (FeatureCount != 0 && input_vectors[i].size() != FeatureCount)
519  {
520  MC_WARNING("Feature count does not match with the expected (%d != %d).", FeatureCount,
521  input_vectors[i].size());
522  return MC::FloatList();
523  }
524  }
525  MC::FloatList Labels;
526 
527  for (unsigned int i = 0; i < input_vectors.size(); ++i)
528  {
529  Labels.push_back(Predict(input_vectors[i], confidences));
530  }
531  return Labels;
532 }
533 
534 
535 float MAClassifier::Predict(const MC::FloatList& input_vector, MC::FloatList& confidence)
536 {
537  float Label = PredictReal(input_vector, confidence);
538 
539  if (MCIsFloatInfinity(Label) || CachedUniqueLabels.empty() ||
541  {
542  return Label;
543  }
544 
545  float FinalResult = *CachedUniqueLabels.begin();
546  float Diff = MCAbs(*CachedUniqueLabels.begin()-Label);
547 
548  for (auto& label : CachedUniqueLabels)
549  {
550  float CurrentDiff = MCAbs(label-Label);
551 
552  if (CurrentDiff < Diff)
553  {
554  Diff = CurrentDiff;
555  FinalResult = label;
556  }
557  }
558  return FinalResult;
559 }
560 
561 
562 float MAClassifier::PredictReal(const MC::FloatList& input_vector, MC::FloatList& confidences)
563 {
564  MC_UNUSED(confidences)
565  if (unlikely(CachedSamples.empty()))
566  {
567  MC_WARNING("The classifier is not trained yet.");
568  return MCFloatInfinity();
569  }
570  if (input_vector.empty())
571  {
572  MC_WARNING("No data in the input vector for classifier prediction.");
573  return MCFloatInfinity();
574  }
575  if (input_vector.size() != FeatureCount)
576  {
577  MC_WARNING("Feature count does not match with the expected test vector (%d != %d).",
578  FeatureCount, input_vector.size());
579  return MCFloatInfinity();
580  }
581  // Train the classifier
582  if (unlikely(!Trained))
583  Train();
584  // Normalize the input vector if needed
585  MC::FloatList NormalizedInputVector = input_vector;
586 
587  if (Preprocessing == MA::FeatureStandardization)
588  {
589  MCStandardizeVector<MC::FloatList, float>(NormalizedInputVector, PreprocessedData);
590  } else
591  if (Preprocessing == MA::FeatureRescaling)
592  {
593  MCRescaleVectorByMinMax<MC::FloatList, float>(NormalizedInputVector, PreprocessedData);
594  }
595  if (Preprocessing == MA::FeatureRescalingToVectorLength)
596  {
597  MCRescaleVectorByLength<MC::FloatList, float>(NormalizedInputVector);
598  }
599  /*
600  * Do the actual predictions
601  */
602  if (DlibFunctions.get())
603  {
604  if (!Regression)
605  {
606  if (Classifier >= MA::SvmClassifierCLinearEkm && Classifier <= MA::SvmClassifierCLinearDlib)
607  {
608  DlibSampleDVectorType TestingSample;
609 
610  TestingSample.set_size(NormalizedInputVector.size(), 1);
611  for (unsigned int i = 0; i < NormalizedInputVector.size(); ++i)
612  TestingSample(i) = NormalizedInputVector[i];
613 
614  return DlibFunctions->ClassifierDFunction(TestingSample);
615  }
616  DlibSampleFVectorType TestingSample;
617 
618  TestingSample.set_size(NormalizedInputVector.size(), 1);
619  for (unsigned int i = 0; i < NormalizedInputVector.size(); ++i)
620  TestingSample(i) = NormalizedInputVector[i];
621 
622  return DlibFunctions->ClassifierFFunction(TestingSample);
623  } else {
624  DlibSampleDVectorType TestingSample;
625 
626  TestingSample.set_size(NormalizedInputVector.size(), 1);
627  for (unsigned int i = 0; i < NormalizedInputVector.size(); ++i)
628  TestingSample(i) = NormalizedInputVector[i];
629 
630  if (Classifier == MA::RvmClassifierLinear || Classifier == MA::KrRegressionLinear ||
631  Classifier == MA::KrlsLinearRegression)
632  {
633  return DlibFunctions->LinearRegressionFunction(TestingSample);
634  } else
635  if (Classifier == MA::RvmClassifierSigmoid)
636  {
637  return DlibFunctions->SigmoidRegressionFunction(TestingSample);
638  }
639  return DlibFunctions->RbfRegressionFunction(TestingSample);
640  }
641  }
642  if (Classifier == MA::MaxEntropyClassifierL1 || Classifier == MA::MaxEntropyClassifierL2)
643  {
644  ME_Sample TestingSample;
645 
646  for (unsigned int i1 = 0; i1 < NormalizedInputVector.size(); ++i1)
647  {
648  TestingSample.add_feature(MCToStr(i1), NormalizedInputVector[i1]);
649  }
650  MaxEntropy->classify(TestingSample);
651  return MCStrConvert<float>(TestingSample.label);
652  }
653  if (Classifier == MA::LwpRegression)
654  {
655  doubleVec TestingSample(NormalizedInputVector.size());
656 
657  for (unsigned int i = 0; i < NormalizedInputVector.size(); ++i)
658  TestingSample[i] = (double)NormalizedInputVector[i];
659  return (float)Lwpr->predict(TestingSample)[0];
660  }
661 #if defined(__unix__)
662  if (Classifier == MA::GaussianProcessRegression)
663  {
664  double TestingSample[NormalizedInputVector.size()];
665 
666  for (unsigned int i = 0; i < NormalizedInputVector.size(); ++i)
667  TestingSample[i] = (double)NormalizedInputVector[i];
668  return (float)Gpr->GprScPtr->f(TestingSample);
669  }
670 #endif
671  if (Classifier == MA::PlsRegression)
672  {
673  Mat2D TestingSample(1, NormalizedInputVector.size());
674 
675  for (unsigned int i = 0; i < NormalizedInputVector.size(); ++i)
676  {
677  TestingSample(0, i) = NormalizedInputVector[i];
678  }
679  Mat2D Result = Plsr->fitted_values(TestingSample, PlsrComponents);
680 
681  return (float)Result(0, 0);
682  }
683  // Convert the feature vector to OpenCV's version
684  cv::Mat TestingSample(1, NormalizedInputVector.size(), CV_32FC1, NormalizedInputVector.data());
685  float Result = 0;
686 
687  // Classifier training
688  if (Classifier == MA::NeuralNetworkClassifier)
689  {
690  cv::Mat Label;
691 
692  NeuralNetwork->predict(TestingSample, Label);
693  Result = Label.at<float>(0, 0);
694  } else
695  if (Classifier == MA::NaiveBayesClassifier)
696  {
697  Result = BayesClassifier->predict(TestingSample);
698  } else
699  if (Classifier == MA::KNearestNeighborClassifier)
700  {
701  Result = KNearestClassifier->find_nearest(TestingSample, 1);
702  } else
703  if (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierNuRbf)
704  {
705  Result = SvmClassifier->predict(TestingSample);
706  } else
707  if (Classifier == MA::DecisionTreeClassifier)
708  {
709  Result = (float)DecisionTree->predict(TestingSample)->value;
710  } else
711  if (Classifier == MA::RandomTreesClassifier)
712  {
713  if (Regression)
714  {
715  Result = RandomTrees->predict(TestingSample);
716  } else {
717  float Confidence = 0;
718 
719  Result = RandomTrees->predict_with_confidence(TestingSample, Confidence);
720  // TODO: The confidence measure does not seem to work for random forest.
721 // confidences.push_back(Confidence);
722  }
723  } else
724  if (Classifier == MA::ExtremeRandomTreesClassifier)
725  {
726  Result = ExtremeRandomTrees->predict(TestingSample);
727  } else
728  if (Classifier == MA::GradientBoostedTreesClassifier)
729  {
730  Result = GradientBoostedTrees->predict(TestingSample);
731  } else
732  if (Classifier == MA::ExpectationMaximization)
733  {
734  cv::Vec2d Response;
735 
736  Response = EmClassifier->Classifier->predict(TestingSample);
737  Result = EmClassifier->LabelMapping[(float)Response[1]];
738  // TODO: The confidence measure is not in the range of [0; 1] and sometimes insane.
739  // Recheck later when adopting to OpenCV 3.x. Don't forget about MAModel::Predict.
740 // confidences.push_back((float)Response[0]);
741  }
742  return Result;
743 }
744 
745 
746 void MAClassifier::Train()
747 {
748  if (Trained)
749  return;
750 
751  // Normalize the training samples if needed
752  MC::FloatTable NormalizedSamples = CachedSamples;
753 
754  PreprocessedData.clear();
755  if (Preprocessing == MA::FeatureStandardization)
756  {
758  MCStandardizeTablePerColumns<float, MC::FloatList, MC::FloatTable>(NormalizedSamples, PreprocessedData);
759  } else
760  if (Preprocessing == MA::FeatureRescaling)
761  {
763  MCRescaleTablePerColumnsByMinMax<float, MC::FloatList, MC::FloatTable>(NormalizedSamples, PreprocessedData);
764  }
765  if (Preprocessing == MA::FeatureRescalingToVectorLength)
766  {
767  MCRescaleTablePerRowByRowLength<float, MC::FloatList, MC::FloatTable>(NormalizedSamples);
768  }
769 
770  /*
771  * Initial training (OpenCV classifiers)
772  */
773  if (Classifier == MA::NeuralNetworkClassifier || Classifier == MA::NaiveBayesClassifier ||
774  Classifier == MA::KNearestNeighborClassifier ||
775  (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierNuRbf) ||
776  (Classifier >= MA::DecisionTreeClassifier && Classifier <= MA::ExpectationMaximization))
777  {
778  // Convert the feature vector to OpenCV's version
779  cv::Mat TrainingSamples(0, FeatureCount, CV_32FC1);
780  cv::Mat TrainingLabels(CachedLabels, false);
781  float* Priors = nullptr;
782 
783  if (!PrioritizedClasses.empty())
784  {
785  MC::FloatList UniqueLabels(CachedUniqueLabels.begin(), CachedUniqueLabels.end());
786 
787  PriorityLabels = MC::FloatList(CachedUniqueLabels.size(), 0.1);
788  for (unsigned int i = 0; i < PrioritizedClasses.size(); ++i)
789  for (unsigned int i1 = 0; i1 < UniqueLabels.size(); ++i1)
790  {
791  if (PrioritizedClasses[i] == UniqueLabels[i1])
792  {
793  PriorityLabels[i1] = 0.15;
794  }
795  }
796  Priors = PriorityLabels.data();
797  }
798  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
799  {
800  cv::Mat Sample(1, FeatureCount, CV_32FC1, NormalizedSamples[i].data());
801 
802  TrainingSamples.push_back(Sample);
803  }
804  // Randomization before every training
805  InitOpenCVRNGWrapper.Reset();
806  MC_TRY_BEGIN
807  // Classifier training
808  if (Classifier == MA::NeuralNetworkClassifier)
809  {
810  cv::Mat Layers(4, 1, CV_32SC1);
811 
812  Layers.row(0) = cv::Scalar(FeatureCount); // -V525 (PVS Studio suppression)
813  Layers.row(1) = cv::Scalar(FeatureCount);
814  Layers.row(2) = cv::Scalar(FeatureCount);
815  Layers.row(3) = cv::Scalar(1);
816  NeuralNetwork->create(Layers);
817  NeuralNetwork->train(TrainingSamples, TrainingLabels, cv::Mat());
818  } else
819  if (Classifier == MA::NaiveBayesClassifier)
820  {
821  BayesClassifier->train(TrainingSamples, TrainingLabels, cv::Mat(), cv::Mat(), false);
822  } else
823  if (Classifier == MA::KNearestNeighborClassifier)
824  {
825  KNearestClassifier->train(TrainingSamples, TrainingLabels, cv::Mat(), Regression, 32, false);
826  } else
827  if (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierNuRbf)
828  {
829  if (SvmAutoTrain)
830  {
831  SvmClassifier->train_auto(TrainingSamples, TrainingLabels, cv::Mat(), cv::Mat(),
832  *SvmClassifierParams.get());
833  if (Classifier == MA::SvmClassifierCLinear || Classifier == MA::SvmClassifierCRbf)
834  {
835  MC_LOG("C parameter (SVM autotraining): %1.6f", SvmClassifier->get_params().C);
836  }
837  if (Classifier == MA::SvmClassifierNuLinear || Classifier == MA::SvmClassifierNuRbf)
838  MC_LOG("ν (nu) parameter (SVM autotraining): %1.6f", SvmClassifier->get_params().nu);
839  if (Classifier == MA::SvmClassifierCRbf || Classifier == MA::SvmClassifierNuRbf)
840  MC_LOG("γ (gamma) parameter (SVM autotraining): %1.6f", SvmClassifier->get_params().gamma);
841  } else {
842  SvmClassifier->train(TrainingSamples, TrainingLabels, cv::Mat(), cv::Mat(),
843  *SvmClassifierParams.get());
844  }
845  } else
846  if (Classifier == MA::DecisionTreeClassifier)
847  {
848  cv::Mat DtVarType(FeatureCount+1, 1, CV_8U);
849  CvDTreeParams Params(TreeMaxDepth, // max depth
850  TreeNodeSampleLimit, // min sample count
851  0.001f, // regression accuracy: N/A here
852  true, // compute surrogate split, no missing data (variable importance calculation)
853  LabelCount <= 1 ? 2 : LabelCount, // max number of categories
854  1, // k-fold cross-validation to prune a tree
855  false, // more aggressive pruning
856  true, // truncate pruned branches
857  Priors // the array of priors
858  );
859 
860  DtVarType.setTo(cv::Scalar(CV_VAR_NUMERICAL)); // all inputs are numerical
861  // this is a classification problem (i.e. predict a discrete number of class
862  // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
863  if (!Regression)
864  DtVarType.at<uchar>(FeatureCount, 0) = CV_VAR_CATEGORICAL;
865  else
866  DtVarType.at<uchar>(FeatureCount, 0) = CV_VAR_NUMERICAL;
867  DecisionTree->train(TrainingSamples, CV_ROW_SAMPLE, TrainingLabels, cv::Mat(), cv::Mat(),
868  DtVarType, cv::Mat(), Params);
869  } else
870  if (Classifier == MA::RandomTreesClassifier || Classifier == MA::ExtremeRandomTreesClassifier)
871  {
872  cv::Mat RtVarType(FeatureCount+1, 1, CV_8U);
873  bool VariableImportance = (Classifier == MA::RandomTreesClassifier ? true : false);
874  CvRTParams Params(TreeMaxDepth, TreeNodeSampleLimit, 0.01, VariableImportance, 1024,
875  Priors, VariableImportance, 0, RtMaxForestSize, 0.01,
876  CV_TERMCRIT_ITER+CV_TERMCRIT_EPS);
877 
878  RtVarType.setTo(cv::Scalar(CV_VAR_NUMERICAL)); // all inputs are numerical
879  // this is a classification problem (i.e. predict a discrete number of class
880  // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
881  if (!Regression)
882  RtVarType.at<uchar>(FeatureCount, 0) = CV_VAR_CATEGORICAL;
883  else
884  RtVarType.at<uchar>(FeatureCount, 0) = CV_VAR_NUMERICAL;
885 
886  if (Classifier == MA::RandomTreesClassifier)
887  {
888  RandomTrees->train(TrainingSamples, CV_ROW_SAMPLE, TrainingLabels, cv::Mat(), cv::Mat(),
889  RtVarType, cv::Mat(), Params);
890  } else {
891  ExtremeRandomTrees->train(TrainingSamples, CV_ROW_SAMPLE, TrainingLabels, cv::Mat(), cv::Mat(),
892  RtVarType, cv::Mat(), Params);
893  }
894  } else
895  if (Classifier == MA::GradientBoostedTreesClassifier)
896  {
897  cv::Mat GbtVarType(FeatureCount+1, 1, CV_8U);
898  CvGBTreesParams Params(CvGBTrees::SQUARED_LOSS, RtMaxForestSize, 0.8, 0.01, TreeMaxDepth, false);
899 
900  GbtVarType.setTo(cv::Scalar(CV_VAR_NUMERICAL)); // all inputs are numerical
901  // this is a classification problem (i.e. predict a discrete number of class
902  // outputs) so reset the last (+1) output var_type element to CV_VAR_CATEGORICAL
903  if (!Regression)
904  GbtVarType.at<uchar>(FeatureCount, 0) = CV_VAR_CATEGORICAL;
905  else
906  GbtVarType.at<uchar>(FeatureCount, 0) = CV_VAR_NUMERICAL;
907 
908  GradientBoostedTrees->train(TrainingSamples, CV_ROW_SAMPLE, TrainingLabels, cv::Mat(), cv::Mat(),
909  GbtVarType, cv::Mat(), Params);
910  } else
911  if (Classifier == MA::ExpectationMaximization)
912  {
913  // Sort out the samples into one mixture set/label
914  MC::FloatList Labels(CachedUniqueLabels.begin(), CachedUniqueLabels.end());
915  cv::Mat Weights(1, Labels.size(), CV_64FC1, cv::Scalar(0));
916  std::vector<cv::Mat> Covs;
917  cv::Mat Means(Labels.size(), FeatureCount, CV_64FC1);
918 
919  Covs.resize(Labels.size());
920  EmClassifier->LabelMapping.clear();
921  for (unsigned int i = 0; i < Labels.size(); ++i)
922  {
923  cv::Mat Samples(0, FeatureCount, CV_64FC1);
924 
925  EmClassifier->LabelMapping.insert(std::make_pair((float)i, Labels[i]));
926  for (unsigned int i1 = 0; i1 < NormalizedSamples.size(); ++i1)
927  {
928  if (CachedLabels[i1] == Labels[i])
929  {
930  const cv::Mat Sample(1, FeatureCount, CV_32FC1, NormalizedSamples[i1].data());
931  cv::Mat TempSample;
932 
933  Sample.convertTo(TempSample, CV_64FC1);
934  Samples.push_back(TempSample);
935  }
936  }
937  cv::calcCovarMatrix(Samples, Covs[i], Means.row(i),
938  CV_COVAR_NORMAL+CV_COVAR_ROWS+CV_COVAR_SCALE, CV_64FC1);
939  if (!PrioritizedClasses.empty() && MCContainerContains(PrioritizedClasses, Labels[i]))
940  {
941  Weights.at<double>(i) = 0.15;
942  } else {
943  Weights.at<double>(i) = 0.1;
944  }
945  }
946  // Note: Default parameter EM::COV_MAT_DIAGONAL is changed to cv::EM::COV_MAT_GENERIC
947  // because of http://stackoverflow.com/questions/23485982
948  EmClassifier->Classifier.reset(new cv::EM(LabelCount, cv::EM::COV_MAT_GENERIC));
949  EmClassifier->Classifier->trainE(TrainingSamples, Means, Covs, Weights);
950  }
951  MC_CATCH_BEGIN
952  MC_WARNING("Fatal error on OpenCV side while training a classifier!");
953  return;
954  MC_CATCH_END
955  Trained = true;
956  return;
957  }
958  /*
959  * Initial training (maximum entropy)
960  */
961  if ((Classifier == MA::MaxEntropyClassifierL1 || Classifier == MA::MaxEntropyClassifierL2) &&
962  !MaxEntropy.get())
963  {
964  MaxEntropy.reset(new ME_Model);
965  // Build training samples
966  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
967  {
968  ME_Sample NewSample(MCToStr(CachedLabels[i]));
969 
970  for (unsigned int i1 = 0; i1 < NormalizedSamples[0].size(); ++i1)
971  {
972  NewSample.add_feature(MCToStr(i1), NormalizedSamples[i][i1]);
973  }
974  MaxEntropy->add_training_sample(NewSample);
975  }
976  // Training
977  if (Classifier == MA::MaxEntropyClassifierL1)
978  MaxEntropy->use_l1_regularizer((double)MeL1);
979  else
980  if (Classifier == MA::MaxEntropyClassifierL2)
981  MaxEntropy->use_l2_regularizer((double)MeL2);
982  MaxEntropy->train();
983  Trained = true;
984  return;
985  }
986  /*
987  * Initial training (lwp regression)
988  */
989  if (Classifier == MA::LwpRegression && !Lwpr.get())
990  {
991  MC_TRY_BEGIN
992  Lwpr.reset(new LWPR_Object(FeatureCount, 1));
993  MC_CATCH_BEGIN
994  printf("LWPR_Object FAILED\n");
995  exit(1);
996  MC_CATCH_END
997  Lwpr->setInitD(50);
998  /* Set init_alpha to 250 in all elements */
999  Lwpr->setInitAlpha(250);
1000  Lwpr->wGen(LwprAlpha);
1001  std::vector<doubleVec> TrainingSamples;
1002  std::vector<doubleVec> TrainingLabels;
1003 
1004  // Convert the training samples/labels
1005  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1006  {
1007  TrainingSamples.push_back(doubleVec(FeatureCount));
1008 
1009  for (unsigned int i1 = 0; i1 < FeatureCount; ++i1)
1010  {
1011  TrainingSamples[i][i1] = (double)NormalizedSamples[i][i1];
1012  }
1013  TrainingLabels.push_back(doubleVec(1));
1014  TrainingLabels[i][0] = (double)CachedLabels[i];
1015  }
1016  // Training
1017  float Error = 1000.0;
1018  int Iterations = 0;
1019 
1020  while (Iterations < 1000 && Error > 0.00001)
1021  {
1022  Error = 0.0;
1023  Iterations++;
1024  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1025  Lwpr->update(TrainingSamples[i], TrainingLabels[i]);
1026  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1027  {
1028  float CurrentError = (float)Lwpr->predict(TrainingSamples[i])[0]-CachedLabels[i];
1029 
1030  Error += CurrentError*CurrentError;
1031  }
1032  }
1033 // printf("Iterations: %d - Final error: %1.10f\n", Iterations, (float)Error);
1034  Trained = true;
1035  return;
1036  }
1037 #if defined(__unix__)
1038  /*
1039  * Initial training (Gaussian process regression)
1040  */
1041  if (Classifier == MA::GaussianProcessRegression && !Gpr->GprScPtr.get())
1042  {
1043  Gpr->GprScPtr.reset(new libgp::GaussianProcess(FeatureCount, "CovSum ( CovSEiso, CovNoise)"));
1044  Eigen::VectorXd params(Gpr->GprScPtr->covf().get_param_dim());
1045 
1046  params << 0.0, 0.0, -2.0;
1047  // set parameters of covariance function
1048  Gpr->GprScPtr->covf().set_loghyper(params);
1049  // Convert the training samples/labels
1050  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1051  {
1052  double TrainingSample[FeatureCount];
1053 
1054  for (unsigned int i1 = 0; i1 < FeatureCount; ++i1)
1055  {
1056  TrainingSample[i1] = (double)NormalizedSamples[i][i1];
1057  }
1058  Gpr->GprScPtr->add_pattern(TrainingSample, (double)CachedLabels[i]);
1059  }
1060  Trained = true;
1061  return;
1062  }
1063 #endif
1064  /*
1065  * Initial training (pls regression)
1066  */
1067  if (Classifier == MA::PlsRegression && !Plsr.get())
1068  {
1069  Plsr.reset(new PLS_Model);
1070  Mat2D TrainingSamples(NormalizedSamples.size(), FeatureCount);
1071  Mat2D TrainingLabels(CachedLabels.size(), 1);
1072 
1073 // printf("Pls: Sample matrix size: %dx%d\n", NormalizedSamples.size(), FeatureCount);
1074  // Convert the training samples/labels
1075  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1076  {
1077  for (unsigned int i1 = 0; i1 < FeatureCount; ++i1)
1078  {
1079  TrainingSamples(i, i1) = NormalizedSamples[i][i1];
1080  }
1081  TrainingLabels(i, 0) = CachedLabels[i];
1082  }
1083  if (PlsrComponents == 0)
1084  {
1085  int LastComponentCount = -1;
1086 
1087  for (int i = 3; i < 100; i += 5)
1088  {
1089 // printf("Pls: Trying component count: %d\n", i);
1090  Plsr->initialize((int)TrainingSamples.cols(), (int)TrainingLabels.cols(), i);
1091  Plsr->plsr(TrainingSamples, TrainingLabels, KERNEL_TYPE2);
1092  Rowi Components = Plsr->loo_optimal_num_components(TrainingSamples, TrainingLabels);
1093  bool Consistent = true;
1094 
1095  for (int i1 = 1; i1 < Components.size(); ++i1)
1096  {
1097  if (Components(0) != Components(i1))
1098  {
1099  Consistent = false;
1100  break;
1101  }
1102  }
1103  if (!Consistent)
1104  continue;
1105  if ((LastComponentCount == -1 && Components(0) > 0) ||
1106  (LastComponentCount != -1 && Components(0) != LastComponentCount))
1107  {
1108  LastComponentCount = Components(0);
1109  continue;
1110  }
1111  if (LastComponentCount != -1 && Components(0) == LastComponentCount)
1112  break;
1113  }
1114  if (LastComponentCount == -1)
1115  {
1116  LastComponentCount = 10;
1117  } else {
1118  LastComponentCount += 2;
1119  }
1120  PlsrComponents = LastComponentCount;
1121 // printf("Final components: %d\n", LastComponentCount);
1122  }
1123  Plsr->initialize((int)TrainingSamples.cols(), (int)TrainingLabels.cols(), PlsrComponents);
1124  Plsr->plsr(TrainingSamples, TrainingLabels, KERNEL_TYPE2);
1125  Trained = true;
1126  return;
1127  }
1128  /*
1129  * Initial training (Dlib classifier methods)
1130  */
1131  if (!DlibFunctions.get() && !Regression)
1132  {
1133  DlibFunctions.reset(new DlibWrapper);
1134  if (Classifier >= MA::SvmClassifierCLinearEkm && Classifier <= MA::SvmClassifierCLinearDlib)
1135  {
1136  // Build training samples
1137  DlibDSampleTableType TrainingSamples;
1138  MC::DoubleList TrainingLabels;
1139 
1140  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1141  {
1142  DlibSampleDVectorType List;
1143 
1144  List.set_size(NormalizedSamples[0].size(), 1);
1145  for (unsigned int i1 = 0; i1 < NormalizedSamples[0].size(); ++i1)
1146  {
1147  List(i1) = NormalizedSamples[i][i1];
1148  }
1149  TrainingSamples.push_back(List);
1150  TrainingLabels.push_back((double)CachedLabels[i]);
1151  }
1152  DlibDMultiTrainerType MultiTrainer;
1153 
1154  // Training
1155  if (Classifier == MA::SvmClassifierCLinearEkm)
1156  {
1157  SvmCLinearEkmTrainerType SvmTrainer;
1158 
1159  SvmTrainer.set_epsilon(DlibEpsilon);
1160  SvmTrainer.set_c(SvmC);
1161  SvmTrainer.set_kernel(DlibDLinearKernelType());
1162  MultiTrainer.set_trainer(SvmTrainer);
1163  } else
1164  if (Classifier == MA::SvmClassifierCRbfEkm)
1165  {
1166  SvmCRbfEkmTrainerType SvmTrainer;
1167 
1168  SvmTrainer.set_epsilon(DlibEpsilon);
1169  SvmTrainer.set_c(SvmC);
1170  SvmTrainer.set_kernel(DlibDRbfKernelType(RvmRbfGamma));
1171  MultiTrainer.set_trainer(SvmTrainer);
1172  } else
1173  if (Classifier == MA::SvmClassifierCLinearDcd)
1174  {
1175  SvmCLinearDcdTrainerType SvmTrainer;
1176 
1177  SvmTrainer.set_epsilon(DlibEpsilon);
1178  SvmTrainer.set_c(SvmC);
1179  MultiTrainer.set_trainer(SvmTrainer);
1180  } else
1181  if (Classifier == MA::SvmClassifierCLinearDlib)
1182  {
1183  SvmCLinearTrainerType SvmTrainer;
1184 
1185  SvmTrainer.set_epsilon(DlibEpsilon);
1186  SvmTrainer.set_c(SvmC);
1187  MultiTrainer.set_trainer(SvmTrainer);
1188  }
1189  DlibFunctions->ClassifierDFunction = MultiTrainer.train(TrainingSamples, TrainingLabels);
1190  } else {
1191  // Build training samples
1192  DlibFSampleTableType TrainingSamples;
1193  MC::DoubleList TrainingLabels;
1194 
1195  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1196  {
1197  DlibSampleFVectorType List;
1198 
1199  List.set_size(NormalizedSamples[0].size(), 1);
1200  for (unsigned int i1 = 0; i1 < NormalizedSamples[0].size(); ++i1)
1201  {
1202  List(i1) = NormalizedSamples[i][i1];
1203  }
1204  TrainingSamples.push_back(List);
1205  TrainingLabels.push_back((double)CachedLabels[i]);
1206  }
1207  DlibFMultiTrainerType MultiTrainer;
1208 
1209  // Training
1210  if (Classifier == MA::RvmClassifierLinear)
1211  {
1212  RvmLinearTrainerType RvmTrainer;
1213 
1214  RvmTrainer.set_epsilon(DlibEpsilon);
1215  RvmTrainer.set_kernel(DlibFLinearKernelType());
1216  MultiTrainer.set_trainer(RvmTrainer);
1217  } else
1218  if (Classifier == MA::RvmClassifierSigmoid)
1219  {
1220  RvmSigmoidTrainerType RvmTrainer;
1221 
1222  RvmTrainer.set_epsilon(DlibEpsilon);
1223  RvmTrainer.set_kernel(DlibFSigmoidKernelType(RvmSigmoidGamma, -1.0));
1224  MultiTrainer.set_trainer(RvmTrainer);
1225  } else
1226  if (Classifier == MA::RvmClassifierRbf)
1227  {
1228  RvmRbfTrainerType RvmTrainer;
1229 
1230  RvmTrainer.set_epsilon(DlibEpsilon);
1231  RvmTrainer.set_kernel(DlibFRbfKernelType(RvmRbfGamma));
1232  MultiTrainer.set_trainer(RvmTrainer);
1233  } else
1234  if (Classifier == MA::KrRegressionLinear)
1235  {
1236  KrrLinearTrainerType KrrTrainer;
1237 
1238  KrrTrainer.set_lambda(KrrLambda);
1239  KrrTrainer.set_kernel(DlibFLinearKernelType());
1240  MultiTrainer.set_trainer(KrrTrainer);
1241  } else
1242  if (Classifier == MA::KrRegressionRbf)
1243  {
1244  KrrRbfTrainerType KrrTrainer;
1245 
1246  KrrTrainer.set_lambda(KrrLambda);
1247  KrrTrainer.set_kernel(DlibFRbfKernelType(KrrGamma));
1248  MultiTrainer.set_trainer(KrrTrainer);
1249  }
1250  DlibFunctions->ClassifierFFunction = MultiTrainer.train(TrainingSamples, TrainingLabels);
1251  }
1252  Trained = true;
1253  return;
1254  }
1255  /*
1256  * Initial training (Dlib regression methods)
1257  */
1258  if (!DlibFunctions.get())
1259  {
1260  DlibFunctions.reset(new DlibWrapper);
1261  // Build training samples
1262  DlibDSampleTableType TrainingSamples;
1263  MC::DoubleList TrainingLabels;
1264 
1265  // Kernel recursive least squares regression must be trained by sample basis
1266  if (Classifier != MA::KrlsLinearRegression && Classifier != MA::KrlsRbfRegression)
1267  {
1268  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1269  {
1270  DlibSampleDVectorType List;
1271 
1272  List.set_size(NormalizedSamples[0].size(), 1);
1273  for (unsigned int i1 = 0; i1 < NormalizedSamples[0].size(); ++i1)
1274  {
1275  List(i1) = NormalizedSamples[i][i1];
1276  }
1277  TrainingSamples.push_back(List);
1278  TrainingLabels.push_back((double)CachedLabels[i]);
1279  }
1280  }
1281  // Training
1282  DlibFunctions.reset(new DlibWrapper);
1283  if (Classifier == MA::RvmClassifierLinear)
1284  {
1285  LinearRegressionTrainerType RvmRegressionTrainer;
1286 
1287  RvmRegressionTrainer.set_epsilon(DlibEpsilon);
1288  RvmRegressionTrainer.set_kernel(DlibDLinearKernelType());
1289  DlibFunctions->LinearRegressionFunction = RvmRegressionTrainer.train(TrainingSamples, TrainingLabels);
1290  } else
1291  if (Classifier == MA::RvmClassifierSigmoid)
1292  {
1293  SigmoidRegressionTrainerType RvmRegressionTrainer;
1294 
1295  RvmRegressionTrainer.set_epsilon(DlibEpsilon);
1296  RvmRegressionTrainer.set_kernel(DlibDSigmoidKernelType(RvmSigmoidGamma, -1.0));
1297  DlibFunctions->SigmoidRegressionFunction = RvmRegressionTrainer.train(TrainingSamples, TrainingLabels);
1298  } else
1299  if (Classifier == MA::RvmClassifierRbf)
1300  {
1301  RbfRegressionTrainerType RvmRegressionTrainer;
1302 
1303  RvmRegressionTrainer.set_epsilon(DlibEpsilon);
1304  RvmRegressionTrainer.set_kernel(DlibDRbfKernelType(RvmRbfGamma));
1305  DlibFunctions->RbfRegressionFunction = RvmRegressionTrainer.train(TrainingSamples, TrainingLabels);
1306  } else
1307  if (Classifier == MA::KrRegressionLinear)
1308  {
1309  KrrLinearRegressionTrainerType KrrRegressionTrainer;
1310 
1311  KrrRegressionTrainer.set_lambda(KrrLambda);
1312  KrrRegressionTrainer.set_kernel(DlibDLinearKernelType());
1313  DlibFunctions->LinearRegressionFunction = KrrRegressionTrainer.train(TrainingSamples, TrainingLabels);
1314  } else
1315  if (Classifier == MA::KrRegressionRbf)
1316  {
1317  KrrRbfRegressionTrainerType KrrRegressionTrainer;
1318 
1319  KrrRegressionTrainer.set_lambda(KrrLambda);
1320  KrrRegressionTrainer.set_kernel(DlibDRbfKernelType(KrrGamma));
1321  DlibFunctions->RbfRegressionFunction = KrrRegressionTrainer.train(TrainingSamples, TrainingLabels);
1322  } else
1323  if (Classifier == MA::KrlsLinearRegression)
1324  {
1325  KrlsLinearRegressionTrainerType KrlsLinearRegressionTrainer(DlibDLinearKernelType(), KrlsTolerance);
1326 
1327  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1328  {
1329  DlibSampleDVectorType List;
1330 
1331  List.set_size(NormalizedSamples[0].size(), 1);
1332  for (unsigned int i1 = 0; i1 < NormalizedSamples[0].size(); ++i1)
1333  {
1334  List(i1) = NormalizedSamples[i][i1];
1335  }
1336  KrlsLinearRegressionTrainer.train(List, CachedLabels[i]);
1337  }
1338  DlibFunctions->LinearRegressionFunction = KrlsLinearRegressionTrainer.get_decision_function();
1339  } else
1340  if (Classifier == MA::KrlsRbfRegression)
1341  {
1342  KrlsRbfRegressionTrainerType KrlsRbfRegressionTrainer(DlibDRbfKernelType(KrlsGamma), KrlsTolerance);
1343 
1344  for (unsigned int i = 0; i < NormalizedSamples.size(); ++i)
1345  {
1346  DlibSampleDVectorType List;
1347 
1348  List.set_size(NormalizedSamples[0].size(), 1);
1349  for (unsigned int i1 = 0; i1 < NormalizedSamples[0].size(); ++i1)
1350  {
1351  List(i1) = NormalizedSamples[i][i1];
1352  }
1353  KrlsRbfRegressionTrainer.train(List, CachedLabels[i]);
1354  }
1355  DlibFunctions->RbfRegressionFunction = KrlsRbfRegressionTrainer.get_decision_function();
1356  }
1357  Trained = true;
1358  return;
1359  }
1360 }
1361 
1362 
1363 MA::CvResultsType MAClassifier::CrossValidate(int iterations, MA::ClassifierCrossValidationType cv_type,
1364  float cv_parameter, const MC::FloatTable& samples,
1365  const MC::FloatList& labels,
1366  const MC::FloatTable& validation_samples,
1367  const MC::FloatList& validation_labels,
1368  float regression_accuracy)
1369 {
1370  if (cv_type == MA::RandomLabelCrossValidation && cv_parameter < 1)
1371  {
1372  MC_WARNING("At least one sample per label is needed for random label cross-validation (%d < 1)",
1373  cv_parameter);
1374  return MA::CvResultsType();
1375  }
1376  if (cv_type == MA::KFoldCrossValidation && cv_parameter < 2)
1377  {
1378  MC_WARNING("At least 2-fold cross-validation can be done (%d < 2)", cv_parameter);
1379  return MA::CvResultsType();
1380  }
1381  if (cv_type == MA::MonteCarloCrossValidation && (cv_parameter < 1 || cv_parameter > 99))
1382  {
1383  MC_WARNING("Monte-Carlo cross-validation must have the training set size between 1 < %d < 100)",
1384  cv_parameter);
1385  return MA::CvResultsType();
1386  }
1387  if (samples.size() != labels.size())
1388  {
1389  MC_WARNING("Label and samples counts are not consistent (%d != %d)", labels.size(), samples.size());
1390  return MA::CvResultsType();
1391  }
1392  MC::FloatTable* SampleFolds = new MC::FloatTable[(unsigned int)cv_parameter];
1393  MC::FloatList* LabelFolds = new MC::FloatList[(unsigned int)cv_parameter];
1394  MC::IntList* TrainingSampleIndexFolds = new MC::IntList[(unsigned int)cv_parameter];
1395  unsigned int MonteCarloTrainingSetSize = (int)(cv_parameter*samples.size() / 100.0);
1396  const MC::FloatList UniqueTestingLabels = MCUniqueItemsFromContainer<MC::FloatList>(labels);
1397  const MC::FloatList UniqueValidationLabels = MCUniqueItemsFromContainer<MC::FloatList>(validation_labels);
1398  int TestingLabelCount = UniqueTestingLabels.size();
1399  int ValidationLabelCount = UniqueValidationLabels.size();
1400  MA::FloatTableList TestingResultsTableList;
1401  MA::FloatTableList ValidationResultsTableList;
1402  MC::FloatList SampleRanks(samples.size(), 0.0);
1403 
1404  // Note: K-fold cross-validation needs to calculate cv_paremeter-th folds in an iteration
1405  for (int iter = 0; iter < iterations*(cv_type == MA::KFoldCrossValidation ? (int)cv_parameter : 1);
1406  ++iter)
1407  {
1408  MC::FloatTable TrainingSamples;
1409  MC::FloatList TrainingLabels;
1410  MC::FloatTable TestingSamples;
1411  MC::FloatList TestingLabels;
1412  MC::IntList Gambling;
1413  MC::FloatTable TestingResultsTable;
1414  MC::FloatTable ValidationResultsTable;
1415  MC::IntList TestingLabelSamples(TestingLabelCount, 0);
1416  MC::IntList ValidationLabelSamples(ValidationLabelCount, 0);
1417  MC::IntList TrainingSampleIndexes;
1418 
1419  // Initialize the testing results table
1420  for (int i = 0; i < TestingLabelCount; ++i)
1421  TestingResultsTable.push_back(MC::FloatList(TestingLabelCount, 0.0));
1422  // Initialize the validation results table
1423  if (!validation_samples.empty())
1424  {
1425  for (int i = 0; i < ValidationLabelCount; ++i)
1426  ValidationResultsTable.push_back(MC::FloatList(ValidationLabelCount, 0.0));
1427  }
1428  // Build the training/testing sets for random label cross-validation
1429  if (cv_type == MA::RandomLabelCrossValidation)
1430  {
1431  MA::FloatIntMap LabelsMap;
1432 
1433  // Fill the label map
1434  for (unsigned int i = 0; i < UniqueTestingLabels.size(); ++i)
1435  {
1436  LabelsMap.insert(std::make_pair(UniqueTestingLabels[i], 0));
1437  }
1438  // Gambling the indexes
1439  for (unsigned int i = 0; i < samples.size(); ++i)
1440  {
1441  Gambling.push_back(i);
1442  }
1443  MCRandomizeContainer<int, MC::IntList>(Gambling);
1444  for (unsigned int i = 0; i < samples.size(); ++i)
1445  {
1446  int CurrentIndex = Gambling[i];
1447 
1448  if (LabelsMap[labels[CurrentIndex]] < (int)cv_parameter)
1449  {
1450  LabelsMap[labels[CurrentIndex]]++;
1451  TrainingLabels.push_back(labels[CurrentIndex]);
1452  TrainingSamples.push_back(samples[CurrentIndex]);
1453  TrainingSampleIndexes.push_back(CurrentIndex);
1454  } else {
1455  TestingLabels.push_back(labels[CurrentIndex]);
1456  TestingSamples.push_back(samples[CurrentIndex]);
1457  }
1458  }
1459  }
1460  // Build the training/testing sets for k-fold cross-validation
1461  if (cv_type == MA::KFoldCrossValidation)
1462  {
1463  // Gambling the samples/labels
1464  if (iter % (int)cv_parameter == 0)
1465  {
1466  for (int i = 0; i < (int)cv_parameter; ++i)
1467  {
1468  SampleFolds[i].clear();
1469  LabelFolds[i].clear();
1470  TrainingSampleIndexFolds[i].clear();
1471  }
1472  for (unsigned int i = 0; i < samples.size(); ++i)
1473  {
1474  Gambling.push_back(i);
1475  }
1476  MCRandomizeContainer<int, MC::IntList>(Gambling);
1477  for (unsigned int i = 0; i < samples.size(); ++i)
1478  {
1479  SampleFolds[i % (int)cv_parameter].push_back(samples[Gambling[i]]);
1480  LabelFolds[i % (int)cv_parameter].push_back(labels[Gambling[i]]);
1481  TrainingSampleIndexFolds[i % (int)cv_parameter].push_back(Gambling[i]);
1482  }
1483  }
1484  // Apply the current fold
1485  for (int i = 0; i < (int)cv_parameter; ++i)
1486  {
1487  if (iter % (int)cv_parameter == i)
1488  {
1489  MCMergeContainers(TestingSamples, SampleFolds[i]);
1490  MCMergeContainers(TestingLabels, LabelFolds[i]);
1491  } else {
1492  MCMergeContainers(TrainingSamples, SampleFolds[i]);
1493  MCMergeContainers(TrainingLabels, LabelFolds[i]);
1494  MCMergeContainers(TrainingSampleIndexes, TrainingSampleIndexFolds[i]);
1495  }
1496  }
1497  }
1498  // Build the training/testing sets for Monte-Carlo cross-validation
1499  if (cv_type == MA::MonteCarloCrossValidation)
1500  {
1501  // Gambling the indexes
1502  for (unsigned int i = 0; i < samples.size(); ++i)
1503  {
1504  Gambling.push_back(i);
1505  }
1506  MCRandomizeContainer<int, MC::IntList>(Gambling);
1507  for (unsigned int i = 0; i < samples.size(); ++i)
1508  {
1509  if (i <= MonteCarloTrainingSetSize)
1510  {
1511  TrainingLabels.push_back(labels[Gambling[i]]);
1512  TrainingSamples.push_back(samples[Gambling[i]]);
1513  TrainingSampleIndexes.push_back(Gambling[i]);
1514  } else {
1515  TestingLabels.push_back(labels[Gambling[i]]);
1516  TestingSamples.push_back(samples[Gambling[i]]);
1517  }
1518  }
1519  }
1520  // Reset the classifier if needs be
1521  if (Trained)
1522  Reset();
1523  // Add samples and predict
1524  AddSamples(TrainingSamples, TrainingLabels);
1525  // Calculate the accuracy on the testing set
1526  MC::FloatList CurrentResults;
1527 
1528  if (!TestingSamples.empty())
1529  {
1530  MC::FloatList Confidence;
1531 
1532  CurrentResults = Predict(TestingSamples, Confidence);
1533  for (unsigned int i = 0; i < TestingLabels.size(); ++i)
1534  {
1535  if (!Regression)
1536  {
1537  int TestingLabelIndex = MCItemIndicesInContainer(UniqueTestingLabels, TestingLabels[i])[0];
1538  int PredictedLabelIndex = MCItemIndicesInContainer(UniqueTestingLabels, CurrentResults[i])[0];
1539 
1540  if (TestingLabelIndex >= 0)
1541  {
1542  if (PredictedLabelIndex >= 0)
1543  {
1544  TestingResultsTable[TestingLabelIndex][PredictedLabelIndex]++;
1545  }
1546  TestingLabelSamples[TestingLabelIndex]++;
1547  }
1548  } else {
1549  if (CurrentResults[i] >= TestingLabels[i]-regression_accuracy &&
1550  CurrentResults[i] <= TestingLabels[i]+regression_accuracy)
1551  {
1552  int TestingLabelIndex = MCItemIndicesInContainer(UniqueTestingLabels, TestingLabels[i])[0];
1553  int PredictedLabelIndex = MCItemIndicesInContainer(UniqueTestingLabels, CurrentResults[i])[0];
1554 
1555  if (TestingLabelIndex >= 0)
1556  {
1557  if (PredictedLabelIndex >= 0)
1558  {
1559  TestingResultsTable[TestingLabelIndex][PredictedLabelIndex]++;
1560  }
1561  TestingLabelSamples[TestingLabelIndex]++;
1562  }
1563  }
1564  }
1565  }
1566  // Update the sample ranks
1567  // 1. Calculate the final accuracy on the testing set in percentage
1568  float Result = MCCalculateDiagonalStatInTable<float>(TestingResultsTable, *new MCArithmeticSum<float>);
1569 
1570  Result = Result / CurrentResults.size()*100;
1571  // 2. Add the accuracy number to all used training samples as a performance measure
1572  for (unsigned int i = 0; i < TrainingSampleIndexes.size(); ++i)
1573  {
1574  SampleRanks[TrainingSampleIndexes[i]] += Result;
1575  }
1576  }
1577  // Check the hypothesis on the validation set
1578  if (!validation_samples.empty())
1579  {
1580  MC::FloatList Confidence;
1581 
1582  CurrentResults = Predict(validation_samples, Confidence);
1583  for (unsigned int i = 0; i < validation_labels.size(); ++i)
1584  {
1585  if (!Regression)
1586  {
1587  int ValidationLabelIndex = MCItemIndicesInContainer(UniqueValidationLabels, validation_labels[i])[0];
1588  int PredictedLabelIndex = MCItemIndicesInContainer(UniqueValidationLabels, CurrentResults[i])[0];
1589 
1590  if (ValidationLabelIndex >= 0)
1591  {
1592  if (PredictedLabelIndex >= 0)
1593  {
1594  ValidationResultsTable[ValidationLabelIndex][PredictedLabelIndex]++;
1595  }
1596  ValidationLabelSamples[ValidationLabelIndex]++;
1597  }
1598  } else {
1599  if (CurrentResults[i] >= validation_labels[i]-regression_accuracy &&
1600  CurrentResults[i] <= validation_labels[i]+regression_accuracy)
1601  {
1602  int ValidationLabelIndex = MCItemIndicesInContainer(UniqueValidationLabels, validation_labels[i])[0];
1603  int PredictedLabelIndex = MCItemIndicesInContainer(UniqueValidationLabels, CurrentResults[i])[0];
1604 
1605  if (ValidationLabelIndex >= 0)
1606  {
1607  if (PredictedLabelIndex >= 0)
1608  {
1609  ValidationResultsTable[ValidationLabelIndex][PredictedLabelIndex]++;
1610  }
1611  ValidationLabelSamples[ValidationLabelIndex]++;
1612  }
1613  }
1614  }
1615  }
1616  } // End of validation set evaluation
1617  // Calculation the results of this run
1618  if (!TestingSamples.empty())
1619  {
1620  for (unsigned int i = 0; i < TestingLabelSamples.size(); ++i)
1621  {
1622  if (TestingLabelSamples[i] != 0.0)
1623  MCMultiplyTableRow(TestingResultsTable, i, (float)100.0 / TestingLabelSamples[i]);
1624  }
1625  TestingResultsTableList.push_back(TestingResultsTable);
1626  }
1627  if (!validation_samples.empty())
1628  {
1629  for (unsigned int i = 0; i < ValidationLabelSamples.size(); ++i)
1630  {
1631  if (ValidationLabelSamples[i] != 0.0)
1632  MCMultiplyTableRow(ValidationResultsTable, i, (float)100.0 / ValidationLabelSamples[i]);
1633  }
1634  ValidationResultsTableList.push_back(ValidationResultsTable);
1635  }
1636  }
1637  // Averaging the k-fold cross-validation results
1638  if (cv_type == MA::KFoldCrossValidation)
1639  {
1640  MA::FloatTableList FinalTestingResults;
1641  MA::FloatTableList FinalValidationResults;
1642  MC::FloatTable CurrentTable;
1643  MC::IntList LabelSampleCounts;
1644 
1645  for (unsigned int i = 0; i < TestingResultsTableList.size(); ++i)
1646  {
1647  if (i % (int)cv_parameter == 0)
1648  {
1649  CurrentTable.clear();
1650  LabelSampleCounts = MC::IntList(TestingResultsTableList[0].size(), 0);
1651  }
1652  MCSumTables(CurrentTable, TestingResultsTableList[i], (float)1.0);
1653  // Check how many real label samples we have to average the iterations correctly
1654  for (unsigned int i1 = 0; i1 < TestingResultsTableList[i].size(); ++i1)
1655  {
1656  bool Empty = true;
1657  for (unsigned int i2 = 0; i2 < TestingResultsTableList[i][i1].size(); ++i2)
1658  {
1659  if (TestingResultsTableList[i][i1][i2] != 0.0)
1660  {
1661  Empty = false;
1662  break;
1663  }
1664  }
1665  if (!Empty)
1666  LabelSampleCounts[i1]++;
1667  }
1668  if (i % (int)cv_parameter == (unsigned int)cv_parameter-1)
1669  {
1670  for (unsigned int i1 = 0; i1 < CurrentTable.size(); ++i1)
1671  MCMultiplyTableRow(CurrentTable, i1, (float)1.0 / LabelSampleCounts[i1]);
1672  FinalTestingResults.push_back(CurrentTable);
1673  }
1674  }
1675  if (!validation_samples.empty())
1676  {
1677  for (unsigned int i = 0; i < ValidationResultsTableList.size(); ++i)
1678  {
1679  if (i % (int)cv_parameter == 0)
1680  {
1681  LabelSampleCounts = MC::IntList(ValidationResultsTableList[0].size(), 0);
1682  CurrentTable.clear();
1683  }
1684  MCSumTables(CurrentTable, ValidationResultsTableList[i], (float)1.0);
1685  // Check how many real label samples we have to average the iterations correctly
1686  for (unsigned int i1 = 0; i1 < ValidationResultsTableList[i].size(); ++i1)
1687  {
1688  bool Empty = true;
1689  for (unsigned int i2 = 0; i2 < ValidationResultsTableList[i][i1].size(); ++i2)
1690  {
1691  if (ValidationResultsTableList[i][i1][i2] != 0.0)
1692  {
1693  Empty = false;
1694  break;
1695  }
1696  }
1697  if (!Empty)
1698  LabelSampleCounts[i1]++;
1699  }
1700  if (i % (int)cv_parameter == (unsigned int)cv_parameter-1)
1701  {
1702  for (unsigned int i1 = 0; i1 < CurrentTable.size(); ++i1)
1703  MCMultiplyTableRow(CurrentTable, i1, (float)1.0 / LabelSampleCounts[i1]);
1704  FinalValidationResults.push_back(CurrentTable);
1705  }
1706  }
1707  }
1708  delete [] SampleFolds;
1709  delete [] LabelFolds;
1710  delete [] TrainingSampleIndexFolds;
1711  return MA::CvResultsType(FinalTestingResults, FinalValidationResults, SampleRanks);
1712  }
1713  delete [] SampleFolds;
1714  delete [] LabelFolds;
1715  delete [] TrainingSampleIndexFolds;
1716  return MA::CvResultsType(TestingResultsTableList, ValidationResultsTableList, SampleRanks);
1717 }
1718 
1719 
1721 {
1722  // Huh, idea is from:
1723  // http://boost.2283326.n4.nabble.com/in-memory-stream-binary-archive-serialization-td2578880.html
1724  std::vector<char> TempBuffer;
1725 
1726  boost::iostreams::stream<boost::iostreams::back_insert_device<std::vector<char> > > OutStream(TempBuffer);
1727  {
1728  boost::archive::binary_oarchive OutputArchive(OutStream);
1729 
1730  OutputArchive << *this;
1731  }
1732  MCBinaryData* BinaryData = new MCBinaryData((int)TempBuffer.size());
1733 
1734  memcpy(BinaryData->GetData(), &TempBuffer[0], TempBuffer.size());
1735  return BinaryData;
1736 }
1737 
1738 
1740 {
1741  MAClassifier* Classifier = nullptr;
1742 
1743  MC_TRY_BEGIN
1744  boost::iostreams::basic_array_source<char> Source((char*)data.GetData(),
1745  (std::size_t)(data.GetSize()));
1746  boost::iostreams::stream<boost::iostreams::basic_array_source<char> > InStream(Source);
1747  boost::archive::binary_iarchive InputArchive(InStream);
1748 
1749  Classifier = new MAClassifier();
1750  InputArchive >> *Classifier;
1751  MC_CATCH_BEGIN
1752  MC_WARNING("Incompatible classifier encoding");
1753  return nullptr;
1754  MC_CATCH_END
1755  return Classifier;
1756 }
1757 
1758 
1760 {
1761  CvStatModel* Model = nullptr;
1762  cv::Algorithm* Model2 = nullptr;
1763 
1764  if (!Trained)
1765  {
1766  MC_WARNING("Can't export a not trained classifier");
1767  return nullptr;
1768  }
1769  // OpenCV classifiers
1770  if (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierNuRbf)
1771  {
1772  Model = SvmClassifier.get();
1773  } else
1774  if (Classifier == MA::NeuralNetworkClassifier)
1775  {
1776  Model = NeuralNetwork.get();
1777  } else
1778  if (Classifier == MA::NaiveBayesClassifier)
1779  {
1780  Model = BayesClassifier.get();
1781  } else
1782  if (Classifier == MA::KNearestNeighborClassifier)
1783  {
1784  Model = KNearestClassifier.get();
1785  } else
1786  if (Classifier == MA::DecisionTreeClassifier)
1787  {
1788  Model = DecisionTree.get();
1789  } else
1790  if (Classifier == MA::RandomTreesClassifier)
1791  {
1792  Model = RandomTrees.get();
1793  } else
1794  if (Classifier == MA::ExtremeRandomTreesClassifier)
1795  {
1796  Model = ExtremeRandomTrees.get();
1797  } else
1798  if (Classifier == MA::GradientBoostedTreesClassifier)
1799  {
1800  Model = GradientBoostedTrees.get();
1801  } else
1802  if (Classifier == MA::ExpectationMaximization)
1803  {
1804  Model2 = EmClassifier->Classifier.get();
1805  }
1806  if (Model || Model2)
1807  {
1808  boost::scoped_ptr<cv::FileStorage> FileStorage;
1809 
1810  FileStorage.reset(new cv::FileStorage(".xml", cv::FileStorage::WRITE+cv::FileStorage::MEMORY));
1811  if (Model)
1812  {
1813  Model->write(**FileStorage, MA::CRMethodTypeStrs[(int)Classifier].c_str());
1814  } else {
1815  cvStartWriteStruct(**FileStorage, MA::CRMethodTypeStrs[(int)Classifier].c_str(), CV_NODE_MAP);
1816  Model2->write(*FileStorage);
1817  cvEndWriteStruct(**FileStorage);
1818  }
1819  *FileStorage << "FeatureCount" << (int)FeatureCount;
1820  MC::FloatList UniqueLabels(CachedUniqueLabels.begin(), CachedUniqueLabels.end());
1821 
1822  *FileStorage << "Labels" << MCEncodeToString(UniqueLabels, true);
1823  if (Classifier == MA::ExpectationMaximization)
1824  {
1825  *FileStorage << "LabelMapping" << MCEncodeToString(EmClassifier->LabelMapping, true);
1826  }
1827  *FileStorage << "PreprocessingMode" << MA::FeaturePreprocessingTypeStrs[(int)Preprocessing];
1828  *FileStorage << "PreprocessingCoefficients" << MCEncodeToString(PreprocessedData, true);
1829  return new MAModel(FileStorage->releaseAndGetString());
1830  }
1831  // Dlib classifiers
1832  std::ostringstream OutputBuffer;
1833 
1834  dlib::serialize(MA::CRMethodTypeStrs[(int)Classifier], OutputBuffer);
1835  if (Classifier == MA::SvmClassifierCLinearEkm || Classifier == MA::SvmClassifierCRbfEkm ||
1836  Classifier == MA::SvmClassifierCLinearDcd || Classifier == MA::SvmClassifierCLinearDlib)
1837  {
1838  DlibDModelSerializeType SaveModel = DlibFunctions->ClassifierDFunction;
1839 
1840  dlib::serialize(SaveModel, OutputBuffer);
1841  } else
1842  if (Classifier == MA::RvmClassifierLinear || Classifier == MA::RvmClassifierSigmoid ||
1843  Classifier == MA::RvmClassifierRbf || Classifier == MA::KrRegressionLinear ||
1844  Classifier == MA::KrRegressionRbf)
1845  {
1846  DlibFModelSerializeType SaveModel = DlibFunctions->ClassifierFFunction;
1847 
1848  dlib::serialize(SaveModel, OutputBuffer);
1849  } else {
1850  MC_WARNING("Unsupported classifier for model export (%s)", MA::CRMethodTypeStrs[(int)Classifier].c_str());
1851  return nullptr;
1852  }
1853  dlib::serialize((int)FeatureCount, OutputBuffer);
1854  MC::FloatList UniqueLabels(CachedUniqueLabels.begin(), CachedUniqueLabels.end());
1855 
1856  dlib::serialize(UniqueLabels, OutputBuffer);
1857  dlib::serialize(MA::FeaturePreprocessingTypeStrs[(int)Preprocessing], OutputBuffer);
1858  dlib::serialize(PreprocessedData, OutputBuffer);
1859  return new MAModel(OutputBuffer.str());
1860 }
1861 
1862 
1864 {
1865  if (Classifier == MA::NeuralNetworkClassifier)
1866  {
1867  if (!NeuralNetwork.get())
1868  NeuralNetwork.reset(new CvANN_MLP);
1869  } else
1870  if (Classifier == MA::NaiveBayesClassifier)
1871  {
1872  if (!BayesClassifier.get())
1873  BayesClassifier.reset(new CvNormalBayesClassifier);
1874  } else
1875  if (Classifier == MA::KNearestNeighborClassifier)
1876  {
1877  if (!KNearestClassifier.get())
1878  KNearestClassifier.reset(new CvKNearest);
1879  } else
1880  if (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierNuRbf)
1881  {
1882  if (!SvmClassifier.get())
1883  SvmClassifier.reset(new CvSVM);
1884  if (!SvmClassifierParams.get())
1885  SvmClassifierParams.reset(new CvSVMParams);
1886  if (Classifier >= MA::SvmClassifierCLinear && Classifier <= MA::SvmClassifierCRbf)
1887  {
1888  if (!Regression)
1889  SvmClassifierParams->svm_type = CvSVM::C_SVC;
1890  else
1891  SvmClassifierParams->svm_type = CvSVM::EPS_SVR;
1892  }
1893  if (Classifier >= MA::SvmClassifierNuLinear && Classifier <= MA::SvmClassifierNuRbf)
1894  {
1895  if (!Regression)
1896  SvmClassifierParams->svm_type = CvSVM::NU_SVC;
1897  else
1898  SvmClassifierParams->svm_type = CvSVM::NU_SVR;
1899  }
1900  if (Classifier == MA::SvmClassifierCLinear || Classifier == MA::SvmClassifierNuLinear)
1901  SvmClassifierParams->kernel_type = CvSVM::LINEAR;
1902  if (Classifier == MA::SvmClassifierCRbf || Classifier == MA::SvmClassifierNuRbf)
1903  SvmClassifierParams->kernel_type = CvSVM::RBF;
1906  SvmClassifierParams->nu = SvmNu;
1907  SvmClassifierParams->gamma = SvmGamma;
1908  } else
1909  if (Classifier == MA::DecisionTreeClassifier)
1910  {
1911  if (!DecisionTree.get())
1912  DecisionTree.reset(new CvDTree);
1913  } else
1914  if (Classifier == MA::RandomTreesClassifier)
1915  {
1916  if (!RandomTrees.get())
1917  RandomTrees.reset(new MARandomTrees);
1918  } else
1919  if (Classifier == MA::ExtremeRandomTreesClassifier)
1920  {
1921  if (!ExtremeRandomTrees.get())
1922  ExtremeRandomTrees.reset(new CvERTrees);
1923  } else
1924  if (Classifier == MA::GradientBoostedTreesClassifier)
1925  {
1926  if (!GradientBoostedTrees.get())
1927  GradientBoostedTrees.reset(new CvGBTrees);
1928  } else
1929  if (Classifier == MA::ExpectationMaximization)
1930  {
1931  if (!EmClassifier.get())
1932  {
1933  EmClassifier.reset(new CvEmWrapper);
1934  // Note: Default parameter EM::COV_MAT_DIAGONAL is changed to cv::EM::COV_MAT_GENERIC
1935  // because of http://stackoverflow.com/questions/23485982
1936  EmClassifier->Classifier.reset(new cv::EM(LabelCount, cv::EM::COV_MAT_GENERIC));
1937  }
1938 #if defined(__unix__)
1939  } else
1940  if (Classifier == MA::GaussianProcessRegression)
1941  {
1942  if (!Gpr.get())
1943  Gpr.reset(new GpWrapper);
1944 #endif
1945  }
1946 }
1947 
1948 
1949 template<class Archive>
1950 void MAClassifier::load(Archive& archive, const unsigned int version)
1951 {
1952  MC_UNUSED(version);
1953  int TempInt = 0;
1954 
1955  archive & TempInt;
1956  Classifier = (MA::CRMethodType)TempInt;
1957  archive & TempInt;
1958  Preprocessing = (MA::FeaturePreprocessingType)TempInt;
1959  // cppcheck-suppress clarifyCondition
1960  archive & Regression;
1961  archive & LabelCount;
1962  CreateClassifier();
1963  archive & SvmGamma;
1964  archive & SvmC;
1965  archive & SvmP;
1966  archive & SvmNu;
1967  // cppcheck-suppress clarifyCondition
1968  archive & SvmAutoTrain;
1969  archive & DlibEpsilon;
1970  archive & RvmSigmoidGamma;
1971  archive & RvmRbfGamma;
1972  archive & KrrGamma;
1973  archive & KrrLambda;
1974  archive & MeL1;
1975  archive & MeL2;
1976  archive & LwprAlpha;
1977  archive & PlsrComponents;
1978  archive & KrlsTolerance;
1979  archive & KrlsGamma;
1980  archive & TreeMaxDepth;
1981  archive & TreeNodeSampleLimit;
1982  archive & RtMaxForestSize;
1983  // The reset applies the learning parameters
1984  Reset();
1985  // Feature count must be restored after loading and applying the learning parameters
1986  archive & FeatureCount;
1987  // Load samples and train classifier
1988  archive & CachedSamples;
1989  archive & CachedLabels;
1990  MC::FloatList Labels;
1991 
1992  archive & Labels;
1993  CachedUniqueLabels = std::set<float>(Labels.begin(), Labels.end());
1994  if (Classifier == MA::ExpectationMaximization)
1995  {
1996  archive & EmClassifier->LabelMapping;
1997  }
1998  archive & PrioritizedClasses;
1999  archive & PriorityLabels;
2000  Trained = false;
2001 }
2002 
2003 
2004 template<class Archive>
2005 void MAClassifier::save(Archive& archive, const unsigned int version) const
2006 {
2007  MC_UNUSED(version);
2008  int TempInt = (int)Classifier;
2009 
2010  archive & TempInt;
2011  TempInt = (int)Preprocessing;
2012  archive & TempInt;
2013  // cppcheck-suppress clarifyCondition
2014  archive & Regression;
2015  archive & LabelCount;
2016  archive & SvmGamma;
2017  archive & SvmC;
2018  archive & SvmP;
2019  archive & SvmNu;
2020  // cppcheck-suppress clarifyCondition
2021  archive & SvmAutoTrain;
2022  archive & DlibEpsilon;
2023  archive & RvmSigmoidGamma;
2024  archive & RvmRbfGamma;
2025  archive & KrrGamma;
2026  archive & KrrLambda;
2027  archive & MeL1;
2028  archive & MeL2;
2029  archive & LwprAlpha;
2030  archive & PlsrComponents;
2031  archive & KrlsTolerance;
2032  archive & KrlsGamma;
2033  archive & TreeMaxDepth;
2034  archive & TreeNodeSampleLimit;
2035  archive & RtMaxForestSize;
2036  // Note: Feature count must be restored after loading and applying the learning parameters
2037  archive & FeatureCount;
2038  archive & CachedSamples;
2039  archive & CachedLabels;
2040  MC::FloatList Labels(CachedUniqueLabels.begin(), CachedUniqueLabels.end());
2041 
2042  archive & Labels;
2043  if (Classifier == MA::ExpectationMaximization)
2044  {
2045  archive & EmClassifier->LabelMapping;
2046  }
2047  archive & PrioritizedClasses;
2048  archive & PriorityLabels;
2049 }
2050 
2051 // Explicit template instantiation
2052 // http://stackoverflow.com/questions/2152002/how-do-i-force-a-particular-instance-of-a-c-template-to-instantiate
2053 // Post by Alexander Poluektov
2054 template void MAClassifier::load<boost::archive::binary_iarchive>(boost::archive::binary_iarchive&,
2055  const unsigned int);
2056 template void MAClassifier::save<boost::archive::binary_oarchive>(boost::archive::binary_oarchive&,
2057  const unsigned int) const;
2058 template void MAClassifier::load<eos::portable_iarchive>(eos::portable_iarchive&, const unsigned int);
2059 template void MAClassifier::save<eos::portable_oarchive>(eos::portable_oarchive&, const unsigned int) const;
bool MCContainerContains(const U &container, const T &value)
Check if a container contains a value.
Arithmetic sum statistic.
float MeL1
L1 regularization parameter (maximum entropy)
float SvmC
C parameter (support vector machine)
boost::scoped_ptr< CvGBTrees > GradientBoostedTrees
Gradient boosted trees classifier.
MAClassifier()
Class constructor.
MC::FloatTable CachedSamples
Cached training samples.
boost::scoped_ptr< CvSVMParams > SvmClassifierParams
Support vector machines classifier.
Binary data class.
void SetParameter(MA::CRMethodParamType method_parameter, float value)
Set a method parameter.
boost::scoped_ptr< CvANN_MLP > NeuralNetwork
Neural network classifier.
void Reset()
The method is reset to default state and the training samples are dropped.
MAModel * ExportModel() const
Export an OpenCV classifier model.
void MCMultiplyTableRow(V &table, unsigned int row_index, T multiplier)
Multiple table row by a value.
float SvmGamma
γ parameter (support vector machine)
boost::scoped_ptr< LWPR_Object > Lwpr
Locally weighted projection regression.
boost::scoped_ptr< GpWrapper > Gpr
Gaussian process regression.
boost::scoped_ptr< PLS_Model > Plsr
Partial least squares regression.
std::vector< std::vector< float > > MCCalculateMinMaxForTableColumns(const V &table)
Calculate minimums and maximums for table columns.
bool IsValid() const
Check if some training samples has been already added.
float KrrGamma
Gamma parameter (kernel ridge regression/rbf kernel)
MCBinaryData * Encode() const
Encode the classifier into binary data.
void CreateClassifier()
Create the classifier/regression method internally with the default parameters.
MA::CvResultsType CrossValidate(int iterations, MA::ClassifierCrossValidationType cv_type, float cv_parameter, const MC::FloatTable &samples, const MC::FloatList &labels, const MC::FloatTable &validation_samples=MC::FloatTable(), const MC::FloatList &validation_labels=MC::FloatList(), float regression_accuracy=MCFloatInfinity())
Cross-validate a set of samples.
MA::FeaturePreprocessingType Preprocessing
Preprocessing type.
Classifier model based on OpenCV classifiers.
Definition: MAModel.hpp:50
#define MC_WARNING(...)
Warning macro.
Definition: MCLog.hpp:43
void SetPreprocessingMode(MA::FeaturePreprocessingType preprocessing_mode)
Set the preprocessing mode.
float KrlsGamma
Gamma parameter (kernel recursive least squares/rbf kernel)
std::string MCEncodeToString(const T &data, bool portable=true)
Encode a data into a string.
std::string MCToStr(const T value, bool hex_manipulator=false)
Convert an other type to string with std::stringstream.
Definition: MCDefs.hpp:360
T MCAbs(const T &value)
Calculate absolute value.
Definition: MCDefs.hpp:399
bool IsRegression() const
Check if regression mode is selected.
int RtMaxForestSize
Maximum number of trees in a forest (random/extremely randomized/gradient boosted trees) ...
boost::scoped_ptr< CvNormalBayesClassifier > BayesClassifier
Naive Bayes classifier.
T MCRand(const T &min, const T &max)
Get a random number generated with standard calls.
Definition: MCDefs.hpp:248
void PrioritizeClasses(const MC::FloatList &labels)
Prioritized classes.
float RvmRbfGamma
Gamma parameter (relevance vector machine/rbf kernel)
static MAClassifier * Decode(const MCBinaryData &data)
Decode the classifier from binary data.
unsigned int LabelCount
Label count.
boost::scoped_ptr< CvSVM > SvmClassifier
Support vector machines classifier.
MC::FloatList PrioritizedClasses
Prioritized classes for OpenCV classifiers.
MA::CRMethodType Classifier
Classifier type.
boost::scoped_ptr< ME_Model > MaxEntropy
Maximum entropy classifier.
MC::FloatList & GetLabels()
Get labels.
boost::scoped_ptr< CvERTrees > ExtremeRandomTrees
Extremely randomized trees classifier.
unsigned int FeatureCount
Feature count.
boost::scoped_ptr< CvEmWrapper > EmClassifier
Expectation maximization classifier.
MA::CRMethodType GetMethodType() const
Get classifier/regression method type.
boost::scoped_ptr< CvKNearest > KNearestClassifier
K-nearest neighbor classifier.
bool MCIsFloatInfinity(const float value)
Check a value for float infinity.
Definition: MCDefs.cpp:122
#define MC_UNUSED(a)
Helper macro to avoid compiler warning about unused function parameters.
Definition: MCDefs.hpp:601
MC::FloatList Predict(const MC::FloatTable &input_vectors, MC::FloatList &confidences)
Predict some samples with labels.
float KrlsTolerance
Tolerance parameter (kernel recursive least squares)
void AddSamples(const MC::FloatTable &input_vectors, const MC::FloatList &labels)
Add samples for a classifier or regression model.
MA::FloatSet CachedUniqueLabels
Cached unique labels.
unsigned int GetFeatureVectorSize() const
Get feature vector size.
float SvmNu
ν parameter (support vector machine)
bool Regression
Whether the instance is a normal classifier or regression is used.
float DlibEpsilon
Epsilon parameter (support/relevance vector machine in dlib)
void MCMergeContainers(U &target, const U &other)
Merge two containers.
MC::FloatTable & GetFeatureVectors()
Get feature vectors.
boost::scoped_ptr< MARandomTrees > RandomTrees
Random trees classifier.
boost::scoped_ptr< DlibWrapper > DlibFunctions
Wrapper for Dlib classifier/regression functions.
boost::scoped_ptr< CvDTree > DecisionTree
Decision tree classifier.
std::vector< int > MCItemIndicesInContainer(const U &container, const T item)
Get item indices in a container.
float SvmP
P parameter (support vector machine)
bool SvmAutoTrain
Auto-train parameter (support vector machine)
unsigned char * GetData() const
Get direct access to the binary data.
float GetParameter(MA::CRMethodParamType method_parameter)
Get a method parameter.
int TreeMaxDepth
Maximum depth (decision/random/extremely randomized/gradient boosted trees)
float MCFloatInfinity()
Get float infinity.
Definition: MCDefs.cpp:110
float LwprAlpha
α parameter (locally weighted projection regression)
MC::FloatList GetModelLabels() const
Get the list of unique labels.
MC::FloatList PriorityLabels
Priors for OpenCV classifiers.
MC::FloatTable PreprocessedData
Cached preprocessed data.
std::vector< std::vector< float > > MCCalculateMeanStDevForTableColumns(const V &table)
Calculate standard deviation and arithmetic mean for table columns.
float MeL2
L2 regularization parameter (maximum entropy)
bool Trained
Whether the classifier is already trained.
Common inferface for multiple classifiers and regression algorithms.
float KrrLambda
Lambda parameter (kernel ridge regression)
#define MC_LOG(...)
Debug macro.
Definition: MCLog.hpp:41
float RvmSigmoidGamma
Gamma parameter (relevance vector machine/sigmoid kernel)
MC::FloatList CachedLabels
Cached training labels.
void MCSumTables(V &target_table, const V &second_table, T multiplier)
Summarize tables.
int GetSize() const
Get binary data size.