Main Page · Modules · All Classes · Class Hierarchy
MASoundData.cpp
1 /*
2  * This file is part of the AiBO+ project
3  *
4  * Copyright (C) 2005-2016 Csaba Kertész (csaba.kertesz@gmail.com)
5  *
6  * AiBO+ is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; either version 2 of the License, or
9  * (at your option) any later version.
10  *
11  * AiBO+ is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Street #330, Boston, MA 02111-1307, USA.
19  *
20  */
21 
22 #include "MASoundData.hpp"
23 
24 #include "MALossyConverter.hpp"
25 
26 #include <MCSampleStatistics.hpp>
27 
28 #include <libresample.h>
29 
30 #include <WaveFile.h>
31 
32 #include <boost/algorithm/string/predicate.hpp>
33 #include <boost/scoped_ptr.hpp>
34 
35 const int MASoundData::SampleRateOnAIBO = 16000;
36 
37 int MASoundData::GetFileDuration(const std::string& file_name)
38 {
39  WaveFile AudioFile(file_name);
40 
41  if (AudioFile.GetBitsPerSample() != 16 || (int)AudioFile.GetAudioFormat() != 1 ||
42  AudioFile.GetNumChannels() < 1 || AudioFile.GetNumChannels() > 2)
43  {
44  MC_WARNING("Only 16 bit mono or stereo PCM Wave files are supported (file: %s)!", file_name.c_str());
45  return 0;
46  }
47  float Divider = (float)2*AudioFile.GetNumChannels()*AudioFile.GetSampleRate() / 1000;
48 
49  return (int)((float)MCGetFileSize(file_name) / Divider);
50 }
51 
52 
53 bool MASoundData::LoadFromFile(const std::string& file_name, MC::DoubleList& left_channel,
54  MC::DoubleList& right_channel, int compression, int bitrate)
55 {
56  // Read the audio data
57  MC::BinaryDataSPtr FileData(new MCBinaryData());
58  int FileSampleRate = 0;
59  int Channels = 0;
60 
61  if (boost::algorithm::iends_with(file_name, ".wav"))
62  {
63  WaveFile AudioFile(file_name);
64 
65  if (AudioFile.GetBitsPerSample() != 16 || (int)AudioFile.GetAudioFormat() != 1 ||
66  AudioFile.GetNumChannels() < 1 || AudioFile.GetNumChannels() > 2 ||
67  AudioFile.GetSampleRate() == 0)
68  {
69  MC_WARNING("Only 16 bit mono or stereo PCM Wave files are supported (file: %s)!", file_name.c_str());
70  return false;
71  }
72  FileSampleRate = (int)AudioFile.GetSampleRate();
73  Channels = AudioFile.GetNumChannels();
74  FileData->Allocate(AudioFile.GetDataSize());
75  memcpy(FileData->GetData(), AudioFile.GetData(), (int)AudioFile.GetDataSize());
76  } else
77  if (boost::algorithm::iends_with(file_name, ".ogg") || boost::algorithm::iends_with(file_name, ".mp3"))
78  {
79  MC::BinaryDataSPtr EncodedData(new MCBinaryData);
80 
81  EncodedData->LoadFromFile(file_name);
82  if (boost::algorithm::iends_with(file_name, ".ogg"))
83  {
84  FileData.reset(MALossyConverter::DecodeFromOggVorbis(*EncodedData, FileSampleRate, Channels));
85  } else
86  if (boost::algorithm::iends_with(file_name, ".mp3"))
87  {
88  FileData.reset(MALossyConverter::DecodeFromMp3(*EncodedData, 0, FileSampleRate, Channels));
89  }
90  if (Channels < 1 || Channels > 2 || FileSampleRate == 0)
91  {
92  MC_WARNING("Only mono or stereo Ogg Vorbis/mp3 files are supported (file: %s)!", file_name.c_str());
93  return false;
94  }
95  }
96  // Convert audio data to double
97  MC::BinaryDataSPtr CompressedFileData;
98  MC::DoubleList FileDataDouble;
99 
100  if (compression == 1)
101  {
102  CompressedFileData.reset(MALossyConverter::EncodeToOggVorbis(*FileData, FileSampleRate,
103  Channels, bitrate));
104  if (!CompressedFileData.get())
105  {
106  MC_WARNING("Lossy Ogg Vorbis encoding failed: %s\n", file_name.c_str());
107  } else {
108  FileData.reset(MALossyConverter::DecodeFromOggVorbis(*CompressedFileData, FileSampleRate, Channels));
109  }
110  } else
111  if (compression == 2)
112  {
113  CompressedFileData.reset(MALossyConverter::EncodeToMp3(*FileData, FileSampleRate,
114  Channels, bitrate));
115  if (!CompressedFileData.get())
116  {
117  MC_WARNING("Lossy mp3 encoding failed: %s\n", file_name.c_str());
118  } else {
119  FileData.reset(MALossyConverter::DecodeFromMp3(*CompressedFileData, FileData->GetSize(),
120  FileSampleRate, Channels));
121  }
122  }
123  FileDataDouble = MASoundData::ConvertToDouble(*FileData);
124  // Split the channels if needs be
125  if (Channels == 1)
126  {
127  left_channel = FileDataDouble;
128  right_channel.clear();
129  } else {
130  left_channel = MCNthItemsFromContainer(FileDataDouble, 1, 2);
131  right_channel = MCNthItemsFromContainer(FileDataDouble, 2, 2);
132  }
133  FileDataDouble.clear();
134  // Resample the channels if needs be
135  if (FileSampleRate != SampleRateOnAIBO)
136  {
137  left_channel = MASoundData::Resample(left_channel, FileSampleRate, SampleRateOnAIBO);
138  if (!right_channel.empty())
139  right_channel = MASoundData::Resample(right_channel, FileSampleRate, SampleRateOnAIBO);
140  }
141  return true;
142 }
143 
144 
145 MC::DoubleList MASoundData::ConvertToDouble(const MCBinaryData& raw_data)
146 {
147  MC::DoubleList NewData;
148 
149  NewData.resize(raw_data.GetSize() / 2);
150  for (int i = 0; i < raw_data.GetSize() / 2; ++i)
151  {
152  NewData[i] = (double)(((int)raw_data.GetData()[i*2+1] << 8) | (int)raw_data.GetData()[i*2]) / ((float)0x8000);
153  if (NewData[i] > 1)
154  NewData[i] -= 2;
155  }
156  return NewData;
157 }
158 
159 
160 MCBinaryData* MASoundData::ConvertDoubleToRaw(MC::DoubleList& left_channel, MC::DoubleList& right_channel)
161 {
162  MCBinaryData* RawData = nullptr;
163  const unsigned int Count = MCMax(left_channel.size(), right_channel.size());
164 
165  RawData = new MCBinaryData((int)(left_channel.size()+right_channel.size())*2);
166  for (unsigned int i = 0; i < Count; ++i)
167  {
168  double Data = 0;
169 
170  if (i < left_channel.size())
171  {
172  Data = left_channel[i];
173  if (Data < 0)
174  Data += 2;
175  }
176  RawData->AddUChar((unsigned char)((int)(Data*32768) % 256));
177  RawData->AddUChar((unsigned char)((int)(Data*32768) / 256));
178 
179  if (right_channel.size() == 0)
180  continue;
181 
182  Data = 0;
183  if (i < right_channel.size())
184  {
185  Data = right_channel[i];
186  if (Data < 0)
187  Data += 2;
188  }
189  RawData->AddUChar((unsigned char)((int)(Data*32768) % 256));
190  RawData->AddUChar((unsigned char)((int)(Data*32768) / 256));
191  }
192  return RawData;
193 }
194 
195 
196 void MASoundData::RemoveLeadingTrailingSilence(MC::DoubleList& audio_data, unsigned int frequency)
197 {
198  int WindowSize = 10*(int)frequency / 10000;
199  MCSampleStatistic<double>* PowerStatistic = new MCMedian<double>;
200  boost::scoped_ptr<MCSamples<double> > PowerSamples(new MCSamples<double>(WindowSize, *PowerStatistic));
201 
202  while ((int)audio_data.size() > WindowSize*2)
203  {
204  int DataSize = audio_data.size();
205  float LeadingPower = 0;
206  float TrailingPower = 0;
207 
208  for (int i = 0; i < WindowSize; ++i)
209  {
210  *PowerSamples << audio_data[i];
211  }
212  LeadingPower = PowerStatistic->GetResult();
213  for (int i = 0; i < WindowSize; ++i)
214  {
215  *PowerSamples << audio_data[DataSize-i];
216  }
217  TrailingPower = PowerStatistic->GetResult();
218  if (LeadingPower >= 10 && TrailingPower >= 10)
219  return;
220  if (LeadingPower <= 10)
221  audio_data.erase(audio_data.begin(), audio_data.begin()+WindowSize);
222  if (TrailingPower <= 10)
223  audio_data.erase(audio_data.end()-WindowSize, audio_data.end());
224  }
225 }
226 
227 
228 MC::DoubleList MASoundData::Resample(const MC::DoubleList& audio_data, int original_frequency, int new_frequency)
229 {
230  if (audio_data.empty())
231  return MC::DoubleList();
232 
233  // Convert the audio data to float
234  MC::FloatList OldData;
235  double Factor = (double)new_frequency / original_frequency;
236 
237  OldData.resize(audio_data.size());
238  for (unsigned int i = 0; i < OldData.size(); ++i)
239  {
240  OldData[i] = (float)audio_data[i];
241  }
242  // Resample the data
243  int MaxDataSize = (int)(ceil((float)audio_data.size()*Factor)+100);
244  MC::FloatList ResampleData;
245  MC::DoubleList FinalData;
246  void* handle = resample_open(1, Factor, Factor);
247  int UsedBytes = 0, OutputCount = 0;
248 
249  ResampleData.resize(MaxDataSize);
250  OutputCount = resample_process(handle, Factor, &OldData[0], OldData.size(), 0, &UsedBytes,
251  &ResampleData[0], ResampleData.size());
252  resample_close(handle);
253  FinalData.resize(OutputCount);
254  for (unsigned int i = 0; i < FinalData.size(); ++i)
255  {
256  FinalData[i] = (float)ResampleData[i];
257  }
258  return FinalData;
259 }
260 
261 
262 unsigned int MASoundData::GetWindowSize(unsigned int frequency)
263 {
264  unsigned int WindowSize = 0;
265 
266  for (WindowSize = 1; WindowSize < frequency*30 / 1000; WindowSize *= 2)
267  { }
268  return WindowSize;
269 }
270 
271 
272 bool MASoundData::Join(const MCBinaryData& left_channel, const MCBinaryData& right_channel,
273  MCBinaryData& stereo)
274 {
275  if (left_channel.IsEmpty() || right_channel.IsEmpty() ||
276  left_channel.GetSize() != right_channel.GetSize() || left_channel.GetSize() % 2 != 0)
277  {
278  MC_WARNING("The data count of the channels are not equal and divisible by 2 (left: %d, right: %d)",
279  left_channel.GetSize(), right_channel.GetSize());
280  return false;
281  }
282  stereo.Allocate(left_channel.GetSize()*2);
283  char* StereoDataPtr = (char*)stereo.GetData();
284  char* LeftChannelDataPtr = (char*)left_channel.GetData();
285  char* RightChannelDataPtr = (char*)right_channel.GetData();
286 
287  for (int i = 0; i < stereo.GetSize() / 4; ++i)
288  {
289  memcpy(StereoDataPtr, LeftChannelDataPtr, 2);
290  memcpy(StereoDataPtr+2, RightChannelDataPtr, 2);
291  LeftChannelDataPtr += 2;
292  RightChannelDataPtr += 2;
293  StereoDataPtr += 4;
294  }
295  return true;
296 }
297 
298 
299 float MASoundData::CalculatePowerDb(const MC::DoubleList& audio_data)
300 {
301  if (audio_data.empty())
302  return 0;
303 
304  // This calculation is based on the Audacity sources and it was verified against
305  // the SNRs of the NAR dataset (https://team.inria.fr/perception/nard) published in
306  // Maxime Janvier (2014): Sound Representation and Classification Benchmark for Domestic Robots
307  float Max = MCCalculateVectorStatistic(audio_data, *new MCMaximum<double>);
308  float Min = MCCalculateVectorStatistic(audio_data, *new MCMinimum<double>);
309  float Peak = (MCAbs(Min) > MCAbs(Max) ? MCAbs(Min) : MCAbs(Max));
310 
311  return 20*log10(Peak);
312 }
313 
314 
315 float MASoundData::CalculateSnr(const float signal_power_db, const float noise_power_db)
316 {
317  if (MCIsFloatInfinity(signal_power_db) || MCIsFloatInfinity(noise_power_db))
318  return MCFloatInfinity();
319 
320  return signal_power_db-noise_power_db;
321 }
static float CalculatePowerDb(const MC::DoubleList &audio_data)
Calculate the power level of the signal in dB.
Minimum statistic.
static MCBinaryData * ConvertDoubleToRaw(MC::DoubleList &left_channel, MC::DoubleList &right_channel)
Convert double lists back to raw audio data.
static MCBinaryData * EncodeToMp3(const MCBinaryData &raw_data, const int sample_rate, const int channels, const int quality)
Encode raw audio data to mp3.
bool IsEmpty() const
Check if the binary data is not filled with invalid data.
static unsigned int GetWindowSize(unsigned int frequency)
Get window size for a given frequency.
static const int SampleRateOnAIBO
Sample rate for audio processing on AIBO.
Binary data class.
static void RemoveLeadingTrailingSilence(MC::DoubleList &audio_data, unsigned int sample_rate)
Remove leading and trailing silences from the audio data.
static int GetFileDuration(const std::string &file_name)
Get audio duration of a file.
Definition: MASoundData.cpp:37
int MCGetFileSize(const std::string &file_name)
Get file size.
Definition: MCDefs.cpp:239
virtual float GetResult()=0
Get the current result of the statistic.
float MCCalculateVectorStatistic(const std::vector< T > &vector, MCSampleStatistic< T > &statistic)
Calculate a statistic over a vector.
#define MC_WARNING(...)
Warning macro.
Definition: MCLog.hpp:43
static MCBinaryData * DecodeFromOggVorbis(MCBinaryData &ogg_data, int &sample_rate, int &channels)
Decode Ogg Vorbis data to raw audio.
static bool Join(const MCBinaryData &left_channel, const MCBinaryData &right_channel, MCBinaryData &stereo)
Join stereo track into two channels.
T MCAbs(const T &value)
Calculate absolute value.
Definition: MCDefs.hpp:399
void Allocate(int size)
Allocate a certain data size.
static float CalculateSnr(const float signal_power_db, const float noise_power_db)
Calculate signal-to-noise ratio (SNR)
static MC::DoubleList ConvertToDouble(const MCBinaryData &raw_data)
Convert audio data to a double list.
Sample statistic base class.
Definition: MCSamples.hpp:30
bool MCIsFloatInfinity(const float value)
Check a value for float infinity.
Definition: MCDefs.cpp:122
static MC::DoubleList Resample(const MC::DoubleList &audio_data, int original_frequency, int new_frequency)
Resample audio data.
static bool LoadFromFile(const std::string &file_name, MC::DoubleList &left_channel, MC::DoubleList &right_channel, int compression=0, int bitrate=0)
Load audio data from a wav file.
Definition: MASoundData.cpp:53
static MCBinaryData * DecodeFromMp3(MCBinaryData &mp3_data, unsigned int orig_size, int &sample_rate, int &channels)
Decode mp3 data to raw audio.
Maximum statistic.
U MCNthItemsFromContainer(const U &container, unsigned int n_th, unsigned int period)
Get items from n-th indexes of a container.
unsigned char * GetData() const
Get direct access to the binary data.
float MCFloatInfinity()
Get float infinity.
Definition: MCDefs.cpp:110
const T MCMax(const U &container)
Get the maximal value of a container.
static MCBinaryData * EncodeToOggVorbis(const MCBinaryData &raw_data, const int sample_rate, const int channels, const int quality)
Encode raw audio data to Ogg Vorbis.
Median statistic.
int GetSize() const
Get binary data size.